プロ野球の選手会のページから球団名と選手一覧のPDFのURLをゲットしてくる。
※個人利用だけどもし問題がある場合は誰か教えてください。
getURL.py
import bs4
import requests
url = requests.get('http://jpbpa.net/register/')
url.raise_for_status()
soup = bs4.BeautifulSoup(url.text, "html.parser")
elems = soup.select('a')
for elem in elems:
# elem.getText=球団名、elem.get('href')=PDFのpath
if elem.getText() == "ロッテ":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "ソフトバンク":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "西武":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "楽天":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "オリックス":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "日本ハム":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "広島":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "阪神":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "DeNA":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "巨人":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "中日":
print('{}({})'.format(elem.getText(), elem.get('href')))
elif elem.getText() == "ヤクルト":
print('{}({})'.format(elem.getText(), elem.get('href')))