今後、開発を進めていきますが、全てのクラスの親となる基底クラスと、
json形式の設定ファイルを考えてみました。
基底クラス appbase.AppBase
#!python
# -*- coding: utf-8 -*-
import json
import logging.config
import os
import psycopg2
import psycopg2.extras
import sys
import time
import urllib.parse
import urllib.request
from selenium import webdriver # ex. pip install selenium==4.0.0a7
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
conf_src = \
os.path.join(os.path.dirname(__file__),
'../../resources/app_py_conf.json')
conf = json.load( open(conf_src) )
http_conf = {"retry_limit":5, "retry_sleep":5 }
logging.config.dictConfig( conf["logging"] )
logger = logging.getLogger()
db_conn = None
class AppBase():
def __init__(self):
pass
def get_conf(self):
return conf
def get_logger(self):
return logger
# 以下の db_cursor()と db_connect() は、
# util/db.py への実装した方がよかったのかも
def db_cursor(self,db_conn):
return db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
def db_connect(self):
global db_conn
if db_conn:
return db_conn
db_conn = psycopg2.connect(
database = conf["db"]["db_name"],
user = conf["db"]["db_user"],
password = conf["db"]["db_pass"],
host = conf["db"]["db_host"],
port = conf["db"]["db_port"] )
return db_conn
# selenium を使用する場合、browser(driver)を返します
def get_browser(self):
selenium_conf = conf["selenium"]
browser_service = \
Service( executable_path=selenium_conf["browser_driver"] )
browser_opts = Options()
for tmp_opt in selenium_conf["browser_options"]:
browser_opts.add_argument( tmp_opt )
browser = webdriver.Edge(service = browser_service,
options = browser_opts )
# 要素が見つかるまで、最大 ?秒 待つ
browser.implicitly_wait( selenium_conf["implicitly_wait"] )
# refer to https://qiita.com/memakura/items/f80d2e2c59514cfc14c9
browser.command_executor._commands["send_command"] = (
"POST",
'/session/$sessionId/chromium/send_command' )
params = {'cmd': 'Page.setDownloadBehavior',
'params': {'behavior': 'allow',
'downloadPath': '/tmp' } }
browser.execute("send_command", params=params)
return browser
# http get は、様々なclassから scrayping で利用しますので
# ここへ、実装します
def get_http_requests(self, req_url):
i = 0
while i < http_conf["retry_limit"]:
try:
http_res = urllib.request.urlopen(req_url)
html_content = http_res.read()
return html_content
except Exception as e:
if "404: Not Found" in str(e):
return None
logger.warning(e)
logger.warning("retry " + req_url)
time.sleep(http_conf["retry_sleep"])
i += 1
logger.error("requests.get() " + req_url)
return None
設定ファイル app_py_conf.json
{"common" : {
"shp2pgsql_cmd": "/usr/local/pgsql/bin/shp2pgsql",
"//shp2pgsql_cmd": "C:/PROGRA~1/PostgreSQL/14/bin/shp2pgsql.exe",
"google_map_api_key":"ないしょ",
"bulk_insert_size": 20,
"geoip2_mmdb" :
"/home/end0tknr/dev/SpringVue/src/main/resources/GeoLite2-City.mmdb"
},
"selenium" : {
"browser_driver": "/usr/local/bin/chromedriver",
"browser_options" : ["--headless",
"--enable-logging=False",
"--ignore-certificate-errors",
"--disable-extensions",
"--disable-print-preview",
"--download.default_directory=/tmp"
],
"implicitly_wait": 10
},
"db" : {
"db_host": "localhost",
"db_port": 5432,
"db_name": "ないしょ",
"db_user": "ないしょ",
"db_pass": "ないしょ"
},
"logging": {
"version": 1,
"disable_existing_loggers": false,
"root": {
"level": "INFO",
"handlers": ["logFileHandler"] },
"handlers": {
"logFileHandler": {
"class": "logging.FileHandler",
"level": "DEBUG",
"formatter": "logFileFormatter",
"filename": "/home/end0tknr/dev/SpringVuePy/log/app_py.log",
"mode": "a",
"encoding": "utf-8" }
},
"formatters": {
"logFileFormatter": {
"format":
"%(levelname)s\t%(asctime)s\t%(module)s\t%(funcName)s\t%(message)s"
}
}
}
}