Python csv出力が上手くいきません。
概要
python初心者です。
下記のコードをサイトを見ながら見よう見まねでトライしたのですが、
添付している画像のようにcsvに上手く出力されません。
エラーは出ておりませんが、どこがおかしいのかをご教授いただけますと幸いです。
該当するソースコード
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from urllib import request
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import datetime
import time
import requests
import csv
import pandas as pd
START_DT_STR = '2021-12-01'
SEARCH_WORD = 'python'
PRTIMES_URL = 'https://prtimes.jp/'
start_dt = datetime.datetime.strptime(START_DT_STR, '%Y-%m-%d')
options=Options()
options.add_argument("--headless")
driver=webdriver.Chrome("/Users/seiya.shibata/Desktop/Python/chromedriver",options=options)
driver.get("https://www.google.com/")
driver = webdriver.Chrome('chromedriver',options=options)
#PR TIMESのトップページを開く
target_url = 'https://prtimes.jp/'
driver.get(target_url)
driver.find_element("xpath",'/html/body/header/div/div[2]/div/input').click()
kensaku = driver.find_element("xpath",'/html/body/header/div/div[2]/div/input')
kensaku.send_keys(SEARCH_WORD)
kensaku.send_keys(Keys.ENTER)
cnt = 0
while True:
try:
driver.find_element_by_xpath("/html/body/main/section/section/div/div/a").click()
except:
break
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
#記事URLを取得(40件ずつ処理)
articles = soup.find_all(class_='list-article__link')[cnt*40:]
#記事情報を格納する配列
#記事ごとの情報を取得
for article in articles:
article_time = article.find(class_='list-article__time')
#csv関連
eof_flag = False
csv_date=datetime.datetime.today().strftime("%Y%m%d")
csv_file_name = 'prtimes_' + csv_date + '.csv'
f = open(csv_file_name, 'w', encoding='cp932',errors="ignore")
writer=csv.writer(f, lineterminator='\n')
csv_header=["title","sub_title","company","pubulished","category1"]
writer.writerow(csv_header)
try:
str_to_dt = datetime.datetime.strptime(article_time.get('datetime'), '%Y-%m-%dT%H:%M:%S%z')
except:
try:
article_time_cvt = article_time.get('datetime').replace('+09:00', '+0900')
str_to_dt = datetime.datetime.strptime(article_time_cvt, '%Y-%m-%dT%H:%M:%S%z')
except:
str_to_dt = datetime.datetime.strptime(article_time.text, '%Y年%m月%d日 %H時%M分')
article_time_dt = datetime.datetime(str_to_dt.year, str_to_dt.month, str_to_dt.day, str_to_dt.hour, str_to_dt.minute)
if article_time_dt < start_dt:
eof_flag = True
break
relative_href = article["href"]
url = urljoin(target_url, relative_href)
r = requests.get(url)
html = r.text
soup = BeautifulSoup(html, "html.parser")
records = []
#記事タイトル
title = soup.select_one("#main > div.content > article > div > header > h1").text
sub_title_elem = soup.select_one("#main > div.content > article > div > header > h2")
#サブタイトル
if sub_title_elem:
sub_title = sub_title_elem.text
else:
sub_title = ""
company = soup.select_one('#main > div.content > article > div > header > div.release--info_wrapper > div.information-release > div').text
published = soup.select_one('#main > div.content > article > div > header > div.release--info_wrapper > div.information-release > time').text
category1= soup.select_one('#main > div.content > article > dl > dd:nth-child(4) > a:nth-child(1)').text
records.append({'title':title,'sub_title':sub_title,'company':company,'published':published,'category1':category1})
writer.writerow(records)
if records:
pass
if eof_flag:
break
time.sleep(2)
cnt += 1
f.close
0