1
1

More than 1 year has passed since last update.

【メモ】Dockerでseleniumスクレイピング始め方

Last updated at Posted at 2022-06-16
/Dockerfile
FROM lambci/lambda:build-python3.8

COPY PROJECTNAME /var/task/PROJECTNAME
COPY tests /var/task/tests

WORKDIR /opt/python/

RUN yum install -y unzip && \
    curl -SL https://chromedriver.storage.googleapis.com/2.43/chromedriver_linux64.zip > chromedriver.zip && \
    curl -SL https://github.com/adieuadieu/serverless-chrome/releases/download/v1.0.0-55/stable-headless-chromium-amazonlinux-2017-03.zip > headless-chromium.zip && \
    unzip chromedriver.zip && \
    unzip headless-chromium.zip

WORKDIR /var/task

RUN pip install boto3 chalice selenium==3.141.0
RUN pip install requests
/docker-compose.yml
version: "3"
services:
  PROJECTNAME:
    build:
      context: .
      dockerfile: ./Dockerfile
    command: python ./PROGECTNAME/setup.py test
    environment:
      - PYTHONUNBUFFERED
    volumes:
      - ./PROGECTNAME:/var/task/PROGECTNAME
      - ./tests:/var/task/tests
/PROJECTNAME/__init__.py
/PROJECTNAME/setup.py
from setuptools import setup, find_packages

setup(
    name='PROJECTNAME',
    version='0.0.1',
    packages=find_packages(
        exclude=['*.tests', '*.tests.*', 'tests.*', 'tests']
    ),
    install_requires=[
        'requests==2.27.1',
    ],
    test_suite='tests'
)
/PROJECTNAME/PROJECTNAME.py
from datetime import date, timedelta
import os
import time
from typing import Dict, List, Tuple

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait

USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36'

# Chromeドライバーオプションの設定
options = webdriver.ChromeOptions()
options.binary_location = '/opt/python/headless-chromium'
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--single-process')
options.add_argument('--lang=ja-JP')
options.add_argument(f'user-agent={USER_AGENT}')

/tests/__init__.py
tests/test_PROJECTNAME.py
from datetime import date
from selenium import webdriver
from PROJECTNAME import PROJECTNAME
from unittest import TestCase

# テスト例
# self.assertIsInstance(r, str)


class SeleniumProject(TestCase):
    def setUp(self):
        self.driver = webdriver.Chrome(
            '/opt/python/chromedriver',
            options=PROJECTNAME.options
        )
        self.username = ''
        self.password = ''

    def test_login(self):
        r = PROJECTNAME.login(self.driver, self.username, self.password)
        self.assertTrue(r)

    def tearDown(self):
        self.driver.quit()
1
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1