Python

from selenium import webdriver
import lxml.html
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from time import sleep
import pickle

driver_path = 'chromedriver'
url = 'https://www.daiwa.co.jp/PCC/HomeTrade/Account/m8301.html'
shiten = ''
account = ''
password = ''

csv = pd.read_csv('codes1804.csv')
csv = csv[csv['33業種コード']!='-']
codes = csv['コード']

driver = webdriver.Chrome(driver_path)
driver.get(url)
driver.implicitly_wait(5)
driver.find_element_by_name('@PM-1@').send_keys(shiten)
driver.find_element_by_name('@PM-2@').send_keys(account)
driver.find_element_by_name('@PM-3@').send_keys(password)
driver.find_element_by_xpath("//input[@type='submit']").click()
driver.implicitly_wait(5)
driver.find_element_by_link_text('マーケット').click()
driver.implicitly_wait(5)
driver.switch_to_frame('qcmain')
driver.find_element_by_id('qcdw_menu23').click()
driver.implicitly_wait(5)
driver.find_element_by_id('qcdw_menu34').click()

shikiho = pickle.load(open('shikiho_1803.txt','rb'))
codes = codes[codes>shikiho[-1][0]]

shikiho = []

for code in tqdm(codes):
add = []
add.append(code)
driver.find_element_by_name('txtCode').clear()
driver.find_element_by_name('txtCode').send_keys(str(code))
driver.find_element_by_name('submit').click()
sleep(3)
root = lxml.html.fromstring(driver.page_source)
exist = root.xpath('//table[@class="tblcol tbltp1 tblhover5lines"]')
if bool(exist)==False:
continue
if root.xpath('//table[@class="tblcol tbltp1 tblhover5lines"][2]/tbody/tr[7]/th')[0].text_content in ['【本店】','【本社】','【本部】','【持株会社本社】']:
i = 5
else:
i = 6
feature = root.xpath('//table[@class="tblcol tbltp1 tblhover5lines"][2]/tbody/tr[' + str(i) + ']/th')[0].text_content().replace('\u3000','').replace('【','').replace('】','').strip()
add.append(feature)
feature = root.xpath('//table[@class="tblcol tbltp1 tblhover5lines"][2]/tbody/tr[' + str(i) + ']/td')[0].text_content().strip()
add.append(feature)
feature = root.xpath('//table[@class="tblcol tbltp1 tblhover5lines"][2]/tbody/tr[' + str(i+1) + ']/th')[0].text_content().replace('\u3000','').replace('【','').replace('】','').strip()
add.append(feature)
feature = root.xpath('//table[@class="tblcol tbltp1 tblhover5lines"][2]/tbody/tr[' + str(i+1) + ']/td')[0].text_content().strip()
add.append(feature)
shikiho.append(add)
print('finished!')
driver.close()

pickle.dump(shikiho,open('shikiho_1803.txt','wb'))
shikiho = pickle.load(open('shikiho_1803.txt','rb'))