Developers.IOの【2021年】AWS全サービスまとめ
をBS4で一覧にする
Developers.IOの【2021年】AWS全サービスまとめの記事について、Beautiful Soupを利用して一覧にする方法です。
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
res = requests.get('https://dev.classmethod.jp/articles/aws-summary-2021/')
soup = BeautifulSoup(res.text, 'html.parser')
all_services = []
categories = soup.find_all('h2')
for category in categories:
category_name = category.text
if category_name == 'まとめ':
break
for el in category.next_siblings:
if el.name == 'h2':
break
if el.name == 'h3':
service_name = el.text
service_name = re.sub('[\r\n]', '', service_name)
service_name = re.sub('^[\s]+', '', service_name)
service_name = re.sub('[ ]+$', '', service_name)
service_name = re.sub('\s*\[.+\]\s*', '', service_name)
if not service_name or service_name.startswith('EVENT') or service_name.startswith('Session Summary:'):
continue
desc_el = el.next_element.next_element.next_element
desc = desc_el.text
all_services.append({
'category_name': category_name,
'service_name': service_name,
'desc': desc
})
print(len(all_services))
all_services_df = pd.DataFrame(all_services)
print(all_services_df)
all_services_df.to_csv('aws_all_services.csv', encoding='ms932')