More than 1 year has passed since last update.

【Python】よく使うライブラリ・コード

Python

Last updated at 2023-05-30Posted at 2020-12-15

設定ファイルとしてjsonを使用する

python

import json

settings = json.load(open('settings.json', encoding='utf8'))
print(settings['key1'])

settings.json

{
    "key1" : "value1",
    "key2" : "value2"
}

txtファイル操作

入力データとしてtxtファイルを使用する（配列に格納）

python

with open('file.txt', mode='r', encoding='utf-8') as f:
    file = f.read()

files = file.split('\n')
print(files)

出力データとしてtxtファイルを使用する

python

with open(filepath, mode='a', encoding='utf-8') as f:
    f.write('文字列\n')

ファイル操作

ファイルのコピー

python

import shutil

shutil.copy2(path_src, path_dist)

日付取得

python

import datetime

now = datetime.datetime.now()
now_disp = now.strftime('%Y%m%d%H%M%S')

ディレクトリのファイル一覧を取得（配列）

python

file_list = os.listdir(path=filepath)

# 上記で取得したファイルの配列から、特定文字が含まれるファイルを抜粋
file_list_unique = [s for s in file_list if '指定文字列' in s]

webスクレイピング

terminal

pip install requests bs4

python

import requests
from bs4 import BeautifulSoup

url = 'website.com'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# soup = BeautifulSoup(response.content, 'lxml')
# soup = BeautifulSoup(response.content, 'html5lib')

# エンコードサンプル
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='shift-jis')


# 単体取得
print(soup.select_one('.css-selector').text) # 文字列を取得する
print(soup.select_one('.css-selector').attrs['href']) # 属性の値を取得する

# 複数ある場合は`select`で取得してforで回す
htmls = soup.select('.css-selector')
for html in htmls:
    print(html.select_one('.css-selector').text)

ログイン（セッション保持）

python

session = requests.Session()

data_login = {
    'propaty': 'value'
}
session.post(url_action, data=data_login)

result = session.get(url_result)
soup_result = BeautifulSoup(result.content, 'html.parser')

エクセル操作

openpyxl

terminal

pip install openpyxl

python

import openpyxl

# エクセルの読み込み
wb = openpyxl.load_workbook(filepath)
ws = wb['シート名']

# エクセルの新規作成
wb = openpyxl.Workbook()
ws = wb.active
ws.title = 'SheetName'


# 特定のセルの値を取得（A1がrow=1, column=1）
cell_obj = ws.cell(row=1, column=1).value

# 特定のセルに値を入力
ws.cell(row=1, column=1).value = '値'

# 特定の列の値を全て取得（※注意：配列のインデックスは0がA列）
for cell_obj in list(ws.columns)[0]:
    print(cell_obj.value)

# 特定の行の値を全て取得（※注意：配列のインデックスは0がA列）
for cell_obj in list(ws.rows)[0]:
    print(cell_obj.value)

# シート保存
wb.save(filepath)

xlwt

import xlwt
 
# bookを作成
book=xlwt.Workbook()
sheet1=book.add_sheet('sheet1')
 
# write(行番号, 列番号, 値)
 
# 2行目A列に値を挿入
sheet1.write(1, 0,'hoge')
 
# B列1～2行目に値を挿入
sheet1.write(0, 1,10)
sheet1.write(1, 1,10)
 
# B列3行名に関数を挿入
sheet1.write(2, 1, xlwt.Formula('sum(B1:B2)'))
 
# bookを保存
book.save('hoge.xls')

ブラウザ自動操作

chromedriverをダウンロードし、プロジェクト直下に配置
https://chromedriver.chromium.org/downloads

terminal

pip install selenium pyperclip

python

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome import service as fs
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.select import Select
import pyperclip

# Driver設定
DRIVER_PATH = './chromedriver'
IMPLICITLY_INTERVAL = 10
options = Options()
# options.add_argument('--headless')
# options.add_argument('--window-size=1600,900')
driver_service = fs.Service(executable_path=DRIVER_PATH)
browser = webdriver.Chrome(service=driver_service, options=options)
browser.implicitly_wait(IMPLICITLY_INTERVAL)

# ChromeDriver操作
## 引数URLを表示
browser.get('https://www.google.com/')

## htmlの取得（--headlessでも可能）
html = browser.page_source
print(html)

## セレクタ指定方法（単体）
element = browser.find_element(By.CSS_SELECTOR, '.selector')
element = browser.find_element(By.CLASS_NAME, 'selector')
element = browser.find_element(By.TAG_NAME, 'div')

## セレクタ指定方法（複数）
elements = browser.find_elements(By.CSS_SELECTOR, '.selector')

## 指定セレクタの属性の値を取得
element.get_attribute('href')

## 指定セレクタをクリック
browser.find_element(By.CSS_SELECTOR, '.selector').click()

## 文字の入力（send_keysだと遅いため、pyperclipで文字列をコピーした状態にして貼り付ける）
pyperclip.copy('入力したい文字列')
browser.find_element(By.CSS_SELECTOR, '.selector').send_keys(Keys.CONTROL, 'v')

## セレクトボックスの選択
selectbox = browser.find_element(By.CSS_SELECTOR, '.selector')
select = Select(selectbox)

### 選択方法
select.select_by_index(2)  # 3番目のoptionタグを選択状態に
select.select_by_index(len(select.options)-1)  # 最後のoptionタグを選択状態に
select.select_by_value('3') # valueが"3"のoptionタグを選択状態にする
select.select_by_visible_text('3番目') # オプションタグの中身が"3番目"と表示されたoptionタグを選択状態にする

## Javascript実行（例：指定セレクタのdisplayプロパティをnoneにする）
browser.execute_script('document.querySelector(".css-selector").style.display="none";')

## ChromeDriverを終了
browser.quit()

その他セレクトボックス参考（ https://yuki.world/selenium-select/ ）

実行ファイル化する

terminal

pip install pyinstaller

terminal

pyinstaller filename.py

pyinstaller filename.py --onefile # 1つの.exeファイルにまとめる
pyinstaller filename.py --noconsole # .exe実行時にコンソールを表示しない
pyinstaller filename.py --onefile --noconsole # 上記両方

定期実行

terminal

pip install schedule

python

import schedule
import time

# 実行job関数
def job():
    print("job実行")


#1分毎のjob実行を登録
schedule.every(1).minutes.do(job)

#1時間毎のjob実行を登録
schedule.every(1).hours.do(job)

#AM11:00のjob実行を登録
schedule.every().day.at("11:00").do(job)

#日曜日のjob実行を登録
schedule.every().sunday.do(job)

#水曜日13:15のjob実行を登録
schedule.every().wednesday.at("13:15").do(job)

# jobの実行監視、指定時間になったらjob関数を実行
while True:
    schedule.run_pending()
    time.sleep(1)

ロリポップサーバーでPythonを使う

ロリポップで「スタンダードプラン」以上を契約する。（SSH接続が必要になるため）
管理画面メニュー「サーバーの管理・設定」内の「SSH」を選択し、SSHを有効にする。
表示された接続情報を使い、SSH接続を行う。（RLogin等）

pip導入

terminal

# バージョンの確認
python3 --version

# pipの導入
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
python3 get-pip.py --user

PATHを通す

terminal

# パスを通す
pwd
→　/home/users/番号/ユーザー名
次に、PATHを通すのに、.bash_profileを作成します。
vi .bash_profile

「.bash_profile」を作成

vi操作：「i」で挿入モード、「:wq」で書き込み終了。

.bash_profile

PATH=$PATH:/home/users/番号/ユーザー名/.local/bin
export PATH

terminal

source .bash_profile

確認＆インストール

terminal

pip3 list

pip3 install requests

cron（定期実行）する場合

以下の２行を、プログラムの先頭行に入れる

python

#!/usr/local/bin/python3.7
# coding:utf-8

上記のpython3.7はその時のバージョンによるので、
SSHからcd /usr/local/bin/で階層を移動してバージョンを確認して行う。

また、プログラム内のパスの記述は相対パスは使えないので注意。（ルートパスか、絶対パス）
ルートパス例：/home/users/1/itigo.jp-balita2981/web/file.txt

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up