LoginSignup
1
0

More than 5 years have passed since last update.

Crowdworks の募集中の案件数を取得 (Beautifulsoup4)

Posted at

Selenium と Beautifulsoup4 のサンプルとして、CrowdWorks の募集中の案件数を取得してみました。
次のページをスクレイピングします。
システム開発 の仕事・依頼を探す
crowdworks_sep0201.png

crowdworks_development.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
#   crowdworks_development.py
#
#                   Sep/02/2018
#
# ------------------------------------------------------------------
import sys
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
#
from bs4 import BeautifulSoup
#
# ------------------------------------------------------------------
def file_write_proc(file_name,str_out):
    fp_out = open(file_name,mode='w',encoding='utf-8')
    fp_out.write(str_out)
    fp_out.close()
#
# ------------------------------------------------------------------
def page_ready_wait_proc(driver):
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME,'result_count'))
        )
# ------------------------------------------------------------------
sys.stderr.write("*** 開始 ***\n")
url_target = "https://crowdworks.jp/public/jobs/group/development/u/professionals?order=new"
file_html = "tmp001.html"
#
#
options = Options()
options.add_argument('-headless')
driver = Firefox(executable_path='/usr/bin/geckodriver', firefox_options=options)
ttx = 100
wait = WebDriverWait(driver, timeout=ttx)
driver.get(url_target)
#
idx="filter_hide_expired"
box_check = driver.find_element_by_id(idx)
box_check.click()
#
idx="filter_hide_budget_pending"
box_check = driver.find_element_by_id(idx)
box_check.click()
#
page_ready_wait_proc(driver)
html = driver.page_source
#
driver.quit()
#
file_write_proc(file_html,html)
#
soup = BeautifulSoup(html, "html.parser")
ccx=soup.find(class_="result_count")
ccy=ccx.find("span")
count=ccy.get_text()
sys.stderr.write("count = " + count + "\n")
#
sys.stderr.write("*** 終了 ***\n")
# ------------------------------------------------------------------

実行結果

$ ./crowdworks_development.py
*** 開始 ***
count = 182
*** 終了 ***
1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0