日銀統計データ取得

Python

Last updated at 2024-09-26Posted at 2024-09-26

日銀のデータは下記のサイトから取得できるがapi等エンジニア扱いやすいよう整備されておらず、通常手作業で取得する必要がある。そこでこれら全てのデータを整形し扱いやすい形式にまとめたい。
https://www.stat-search.boj.or.jp/ssi/cgi-bin/famecgi2?cgi=$nme_s060

下記は公開データ一覧
https://www.stat-search.boj.or.jp/info/statistics_menu_list_j.pdf

本記事ではとりあえずサイトの構造を理解するためにもデータを1つseleniumで取得してみたのでコードを記載します。

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# ChromeDriverのパス
driver_path = '/usr/local/bin/chromedriver'

# WebDriverを初期化
service = Service(driver_path)
driver = webdriver.Chrome(service=service)

# URLにアクセス
driver.get('https://www.stat-search.boj.or.jp/ssi/cgi-bin/famecgi2?cgi=$nme_a000&lstSelection=FM08')

try:
    # 展開ボタンをクリック
    expand_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="menuSearchTabpanel"]/div[2]/div[1]/div[2]/input'))
    )
    expand_button.click()
    print("展開ボタンをクリックしました。")

    # 全てのデータ系列を選択するチェックボックスをクリック
    select_all_checkbox = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="menuSearchTabpanel"]/div[2]/div[2]/div[1]/label'))
    )
    select_all_checkbox.click()
    print("全てのデータ系列を選択しました。")

    # 抽出条件に追加ボタンをクリック
    add_condition_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="menuSearchTabpanel"]/div[2]/div[2]/div[4]/a'))
    )
    add_condition_button.click()
    print("抽出条件に追加ボタンをクリックしました。")

    # 抽出ボタンをクリック（新しいウィンドウが開く）
    extract_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="resultArea"]/div[4]/ul/li[1]/a'))
    )
    extract_button.click()
    print("抽出ボタンをクリックしました。")

    # 新しいウィンドウが開かれるのを待つ
    WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))

    # 新しいウィンドウに切り替え
    window_after = driver.window_handles[1]
    driver.switch_to.window(window_after)

    # ダウンロードボタンをクリック（さらに新しいウィンドウが開く）
    download_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '/html/body/div[2]/div/div[2]/table/tbody/tr[2]/td[5]/a'))
    )
    download_button.click()
    print("ダウンロードボタンをクリックしました。")

    # さらに新しいウィンドウが開かれるのを待つ
    WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(3))

    # CSVファイルのリンクがあるウィンドウに切り替え
    window_csv = driver.window_handles[2]
    driver.switch_to.window(window_csv)

    # CSVファイルのリンクを取得
    csv_link = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div/div/div/table/tbody/tr/td/a'))
    )

    # リンクのURLをターミナルに出力
    csv_url = csv_link.get_attribute('href')
    print(f"CSVファイルのリンク: {csv_url}")

except Exception as e:
    print(f"エラーが発生しました: {e}")
finally:
    # WebDriverを終了
    driver.quit()

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up