LoginSignup
2
4

More than 5 years have passed since last update.

クローラー、スクレイピング用tips(selenium,lxml)

Last updated at Posted at 2018-08-15
selenium.py
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


#ヘッドレスブラウザ
options = Options()
options.add_argument('--headless')
# options.add_argument('--window-size=1440,900')
driver = webdriver.Chrome(chrome_options=options)

#ヘッドレスじゃないブラウザ
driver = webdriver.Chrome()


driver.maximize_window()
driver.implicitly_wait(30)

driver.get("任意のURL")
driver.find_element_by_name('hoge').send_keys('hoge')
driver.find_element_by_css_selector('hoge').click()
driver.find_element_by_css_selector('hoge').get_attribute('hoge')
lxml.py
import lxml.html
from urllib.request import urlopen
from lxml import etree


tree = lxml.html.parse(urlopen("任意のURL"))
html = tree.getroot()

html.cssselect("h1")[0].text

#全ての取得できる対象のHTMLを取得(日本語があるサイト)
lxml.html.tostring(tree, encoding="utf-8").decode()


selenium_and_lxml.py
#seleniumでjsを読み込んで
driver = webdriver.Chrome()
driver.get("https://hoge/")
#driver.page_sourceを取得する
html = lxml.html.fromstring(driver.page_source)
title = html.cssselect("section#coordinate_info h1")[0].text
image = html.cssselect("div#coordinate_img img")[0].get("src")#属性取得
print(title)
print(image)
2
4
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
4