2
2

More than 3 years have passed since last update.

音楽チャートをスクレイピングした(ほぼ日記)

Posted at

ただコードをおいておくためだけに。

import pandas as pd
import time
from selenium import webdriver
from datetime import date
from datetime import timedelta

driver = webdriver.Chrome("****/chromedriver")
driver.implicitly_wait(30)

url = "http://www.billboard-japan.com/charts/detail?a=sales&"
data = pd.DataFrame(columns = ["year", "title", "artist", "num"])
for i in range(60):
  d = date(2020, 10, 5) - timedelta(days = 28*i)
  day = d.strftime("year=%Y&month=%m&day=") + str(int(d.strftime("%d")))
  driver.get(url + day)
  title = driver.find_elements_by_class_name("musuc_title")
  artist = driver.find_elements_by_class_name("artist_name")
  num = driver.find_elements_by_class_name("num")
  if num == []:
      break
  for j in range(50):
    r = len(data.index)
    data.at[r, "year"] = d.strftime("%Y/%m/%d")
    data.at[r, "title"] = title[j].text
    data.at[r, "artist"] = artist[j].text
    data.at[r, "num"] = num[j].text
  print(">>> " + d.strftime("%Y/%m/%d") + " 読み込み完了")
  time.sleep(1)
driver.quit()

data.to_csv("****/data.csv")
2
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
2