More than 5 years have passed since last update.

Selenium でキャプチャ取得マシンを作る

Last updated at 2015-07-23Posted at 2015-07-23

CentOS6 でキャプチャ取得サーバーを作成するために、
Selenium を使ってみた！

まず、CUI 上で Firefox を動かせるように仮想ディスプレイの設定を行う
仮想ディスプレイは Xvfb を使用する

# yum -y install Xvfb firefox

ついでに firefox もインストールする

次に UUID を設定する

# dbus-uuidgen > /var/lib/dbus/machine-id

Python から Selenium を操作する準備

# cd /usr/local/src
# wget http://peak.telecommunity.com/dist/ez_setup.py
# python ez_setup.py
# wget https://raw.github.com/pypa/pip/master/contrib/get-pip.py
# python get-pip.py
# pip install selenium

Xvfb の設定と自動起動できるようにする
rcスクリプトを作成

# vi /etc/init.d/xvfb

# !/bin/bash
#
# chkconfig: - 91 35
# description: Xvfb
export DISPLAY="localhost:1.0"
# Source function library.
. /etc/init.d/functions

prog=$"Xvfb"

# Xvfb program
XVFB=/usr/bin/Xvfb
STATUS=":1 -screen 0 1024x768x8"
pidf=/var/run/xvfb.pid

start() {
    if [ -e $pidf ];
    then
        action $"Starting $prog: " /bin/false
        echo "$prog already started"
    else
        action $"Starting $prog: " /bin/true
        $XVFB $STATUS > /dev/null 2>&1 &
        echo $! > $pidf
    fi
}

stop() {
    if [ -e $pidf ];
    then
        action $"Stopping $prog: " /bin/true
        pid=`cat $pidf`
        test ! -z $pid && kill $pid && rm -f $pidf
    else
        action $"Stopping $prog: " /bin/false
        echo "$prog not running"
    fi
}

status() {
    if [ -e $pidf ];
    then
        echo "$prog (pid `cat $pidf`) を実行中..."
    else
        echo "$prog not running"
    fi
}

case "$1" in
    start)
        start
        ;;
    stop)
        stop
        ;;
    restart)
        stop
        sleep 1
        start
        ;;
    status)
        status
        ;;
    *)
        echo $"Usave: $0 {start|stop|restart|status}"
        exit 1
esac

exit 0

作成した rcスクリプトをサービス登録

# chmod 755 /etc/init.d/xvfb
# chkconfig --add xvfb
# chkconfig xvfb on
# /etc/init.d/xvfb start

仮想ディスプレイを環境変数にセット

# vi /etc/profile

以下を追記

export DISPLAY="localhost:1.0"

最後にキャプチャ取得プログラムを作成

$ vi cap.py

# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re, sys, os
from PIL import Image

try:
    URL    = sys.argv[1]
    FILE   = sys.argv[2]
    DEVICE = sys.argv[3]
except:
    print('Not argv')
    sys.exit(1)

class Cap(unittest.TestCase):
    def setUp(self):
        profile = webdriver.FirefoxProfile()
        profile.set_preference("browser.download.useDownloadDir", True)
        profile.set_preference("browser.download.folderList", 2)
        profile.set_preference("browser.download.dir", os.path.dirname(FILE))
        profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain, application/vnd.ms-excel, text/csv, text/comma-separated-values, application/octet-stream")
        profile.set_preference("browser.cache.disk.enable", False)
        profile.set_preference("browser.cache.memory.enable", False)
        profile.set_preference("browser.cache.offline.enable", False)
        profile.set_preference("network.http.use-cache", False)

        useragent = ""
        if DEVICE == "sp":
            useragent = "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A365 Safari/600.1.4"
        else:
            useragent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)"

        profile.set_preference("general.useragent.override", useragent)

        self.driver = webdriver.Firefox(firefox_profile=profile)
        self.driver.implicitly_wait(100000)
        self.base_url = URL
        self.verificationErrors = []
        self.accept_next_alert = True

        if DEVICE == "sp":
            self.driver.set_window_size(320, 480)

    def test_cap(self):
        driver = self.driver

        driver.get(URL)
        time.sleep(3)
        driver.save_screenshot(FILE)

        # slice image
        # org_im = Image.open(FILE)
        # size   = org_im.size
        # new_im = None
        # height = 800 if size[1] > 800 else size[1]
        # new_im = org_im.crop((0, 0, size[0], height))
        # if DEVICE == "sp":
        #     new_im = org_im.crop((0, 0, size[0], height))
        # else:
        #     new_im = org_im.crop((0, 0, size[0], height))

        # new_im.save(FILE, "PNG")
        # os.chmod(FILE, 0666)
      
    def is_element_present(self, how, what):
        try:
            self.driver.find_element(by=how, value=what)
        except NoSuchElementException, e:
            return False

        return True

    def is_alert_present(self):
        try:
            self.driver.switch_to_alert()
        except NoAlertPresentException, e:
            return False

        return True

    def close_alert_and_get_its_text(self):
        try:
            alert = self.driver.switch_to_alert()
            alert_text = alert.text
            if self.accept_next_alert:
                alert.accept()
            else:
                alert.dismiss()

            return alert_text
        finally:
            self.accept_next_alert = True

    def tearDown(self):
        self.driver.quit()
        self.assertEqual([], self.verificationErrors)

if __name__ == "__main__":
    unittest.main(argv=sys.argv[:1])

使い方

$ python cap.py [URL] [保存先パス] [デバイス(pc or sp)]

取得したキャプチャを加工する場合は、
slice image 以下でゴニョゴニョする

※ 日本語が文字化けしている場合、フォントインストール

# yum install fonts-ja* ttfonts-ja*

犬の画像を大量に集め中♪w

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up