CentOS6 でキャプチャ取得サーバーを作成するために、
Selenium を使ってみた!
まず、CUI 上で Firefox を動かせるように仮想ディスプレイの設定を行う
仮想ディスプレイは Xvfb を使用する
# yum -y install Xvfb firefox
ついでに firefox もインストールする
次に UUID を設定する
# dbus-uuidgen > /var/lib/dbus/machine-id
Python から Selenium を操作する準備
# cd /usr/local/src
# wget http://peak.telecommunity.com/dist/ez_setup.py
# python ez_setup.py
# wget https://raw.github.com/pypa/pip/master/contrib/get-pip.py
# python get-pip.py
# pip install selenium
Xvfb の設定と自動起動できるようにする
rcスクリプトを作成
# vi /etc/init.d/xvfb
#!/bin/bash
#
# chkconfig: - 91 35
# description: Xvfb
export DISPLAY="localhost:1.0"
# Source function library.
. /etc/init.d/functions
prog=$"Xvfb"
# Xvfb program
XVFB=/usr/bin/Xvfb
STATUS=":1 -screen 0 1024x768x8"
pidf=/var/run/xvfb.pid
start() {
if [ -e $pidf ];
then
action $"Starting $prog: " /bin/false
echo "$prog already started"
else
action $"Starting $prog: " /bin/true
$XVFB $STATUS > /dev/null 2>&1 &
echo $! > $pidf
fi
}
stop() {
if [ -e $pidf ];
then
action $"Stopping $prog: " /bin/true
pid=`cat $pidf`
test ! -z $pid && kill $pid && rm -f $pidf
else
action $"Stopping $prog: " /bin/false
echo "$prog not running"
fi
}
status() {
if [ -e $pidf ];
then
echo "$prog (pid `cat $pidf`) を実行中..."
else
echo "$prog not running"
fi
}
case "$1" in
start)
start
;;
stop)
stop
;;
restart)
stop
sleep 1
start
;;
status)
status
;;
*)
echo $"Usave: $0 {start|stop|restart|status}"
exit 1
esac
exit 0
作成した rcスクリプトをサービス登録
# chmod 755 /etc/init.d/xvfb
# chkconfig --add xvfb
# chkconfig xvfb on
# /etc/init.d/xvfb start
仮想ディスプレイを環境変数にセット
# vi /etc/profile
以下を追記
export DISPLAY="localhost:1.0"
最後にキャプチャ取得プログラムを作成
$ vi cap.py
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re, sys, os
from PIL import Image
try:
URL = sys.argv[1]
FILE = sys.argv[2]
DEVICE = sys.argv[3]
except:
print('Not argv')
sys.exit(1)
class Cap(unittest.TestCase):
def setUp(self):
profile = webdriver.FirefoxProfile()
profile.set_preference("browser.download.useDownloadDir", True)
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.download.dir", os.path.dirname(FILE))
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain, application/vnd.ms-excel, text/csv, text/comma-separated-values, application/octet-stream")
profile.set_preference("browser.cache.disk.enable", False)
profile.set_preference("browser.cache.memory.enable", False)
profile.set_preference("browser.cache.offline.enable", False)
profile.set_preference("network.http.use-cache", False)
useragent = ""
if DEVICE == "sp":
useragent = "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A365 Safari/600.1.4"
else:
useragent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)"
profile.set_preference("general.useragent.override", useragent)
self.driver = webdriver.Firefox(firefox_profile=profile)
self.driver.implicitly_wait(100000)
self.base_url = URL
self.verificationErrors = []
self.accept_next_alert = True
if DEVICE == "sp":
self.driver.set_window_size(320, 480)
def test_cap(self):
driver = self.driver
driver.get(URL)
time.sleep(3)
driver.save_screenshot(FILE)
# slice image
# org_im = Image.open(FILE)
# size = org_im.size
# new_im = None
# height = 800 if size[1] > 800 else size[1]
# new_im = org_im.crop((0, 0, size[0], height))
# if DEVICE == "sp":
# new_im = org_im.crop((0, 0, size[0], height))
# else:
# new_im = org_im.crop((0, 0, size[0], height))
# new_im.save(FILE, "PNG")
# os.chmod(FILE, 0666)
def is_element_present(self, how, what):
try:
self.driver.find_element(by=how, value=what)
except NoSuchElementException, e:
return False
return True
def is_alert_present(self):
try:
self.driver.switch_to_alert()
except NoAlertPresentException, e:
return False
return True
def close_alert_and_get_its_text(self):
try:
alert = self.driver.switch_to_alert()
alert_text = alert.text
if self.accept_next_alert:
alert.accept()
else:
alert.dismiss()
return alert_text
finally:
self.accept_next_alert = True
def tearDown(self):
self.driver.quit()
self.assertEqual([], self.verificationErrors)
if __name__ == "__main__":
unittest.main(argv=sys.argv[:1])
使い方
$ python cap.py [URL] [保存先パス] [デバイス(pc or sp)]
取得したキャプチャを加工する場合は、
slice image 以下でゴニョゴニョする
※ 日本語が文字化けしている場合、フォントインストール
# yum install fonts-ja* ttfonts-ja*
犬の画像を大量に集め中♪w