More than 1 year has passed since last update.

Speaker Deckのスライド更新はPythonにやらせよう

Last updated at 2023-07-16Posted at 2023-07-15

やりたいこと

Speaker DeckへのPDFをアップロードしてから、何度もﾁﾏﾁﾏと更新してしまいますが、面倒です
Pythonで自動化してみようと思いました

先に感想

けっこう面倒です、というかかなり面倒です。。。

注意

練習でアップロードを何十回も繰り返しているとスライドの反映が遅くなったりしていました(ペナルティかも？)。ご注意ください。

やっていること

デモ

30秒くらいかかってますね。

前提条件

先にスライドを登録していること
- user_id、スライドのURLが分かっていること。(注:ここでは、「スライドのURL」は全体のURLの一部を指しています→ https://speakerdeck.com/<user_id>/<スライドのURL>)
- (実際はドラフト状態でも更新できます)
SpeakerDeckのIDとパスワードでログインすること
- 他のやり方も追加はできるけどここではやらない
PDFを保存すると、トリガー開始する
- PDFのFullPathを登録する
Python、selenium, watchdogがインストールできること
- https://pypi.org/project/watchdog/
- https://selenium-python.readthedocs.io/installation.html

Seleniumインストールで参考にしたブログ
chromedriver_binaryを使っています。

コード

My環境
Python 3.9.13
watchdog 3.0.0
selenium 4.4.3
(バージョン依存は特にないはず)

selenium用のコード

以下は適宜変更が必要です。

変数	種類	説明
SLIDE_URL	ハードコード	`https://speakerdeck.com/<user_id>/<スライドのURL>`のスライドのURLの部分
FILE_PATH	ハードコード	WatchdogがPDFを検知するためのフルパス
SPEAKER_DECK_USERNAME	環境変数	SpeakerDeckにSeleniumからログインするため
SPEAKER_DECK_PASSWORD	環境変数	SpeakerDeckにSeleniumからログインするため

driver.set_window_positionとdriver.set_window_sizeはSeleniumが作るウィンドウの位置とサイズを調整用で、デフォルトではコメントアウトしています。

upload_to_speakerdeck.py

import logging
import os
import sys
import time

import chromedriver_binary
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

SLIDE_URL = "test-auto-upload"
FILE_PATH = "/home/user/Downloads/test-auto-upload.pdf" 

logger = logging.getLogger()


def setup_logger():
    logger.setLevel(logging.INFO)
    handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
    )
    handler.setFormatter(formatter)
    logger.addHandler(handler)


def initialize_webdriver(url: str) -> webdriver.Chrome:
    """
    Initialize Chrome webdriver and open the specified URL
    """
    driver = webdriver.Chrome()
    # driver.set_window_size(1080, 800)  # option
    # driver.set_window_position(0, 0)  # option
    driver.get(url)
    return driver


def upload_file(file_path: str):
    """
    Function to upload the file to SpeakerDeck
    """
    logger.info(f"Starting upload for {file_path}")

    URL = "https://speakerdeck.com/signin/"
    driver = initialize_webdriver(URL)
    file_path_local = file_path
    sign_in(driver)

    # Edit page
    speaker_deck_id = os.environ["SPEAKER_DECK_ID"]
    driver.get(f"https://speakerdeck.com/{speaker_deck_id}/{SLIDE_URL}/edit")
    upload_pdf(driver, file_path_local)
    save_changes(driver)

    logger.info(f"Finished upload for {file_path}")


def sign_in(driver: webdriver.Chrome):
    """
    Function to handle the sign-in process on SpeakerDeck
    """
    username = os.environ["SPEAKER_DECK_USERNAME"]
    password = os.environ["SPEAKER_DECK_PASSWORD"]

    # Locate username field and fill it
    username_field = WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.ID, "user_username"))
    )
    username_field.send_keys(username)
    logger.info("Completed entering username into the field.")

    # Locate password field and fill it
    password_field = WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.ID, "user_password"))
    )
    password_field.send_keys(password)
    logger.info("Completed entering password into the field.")

    # Locate sign-in button and click it
    sign_in_button = WebDriverWait(driver, 30).until(
        EC.element_to_be_clickable((By.NAME, "commit"))
    )
    sign_in_button.click()
    logger.info("Completed finding and clicking sign-in button.")


def upload_pdf(driver: webdriver.Chrome, file_path_local: str):
    """
    Function to upload a PDF file
    """
    # Locate reupload button and click it
    reupload_button = WebDriverWait(driver, 30).until(
        EC.element_to_be_clickable(
            (By.XPATH, "//*[@class='btn btn-outline-primary js-upload-reprocess']")
        )
    )
    reupload_button.click()
    logger.info("Completed finding and clicking reupload button.")

    # Locate file upload field and send file
    file_input = WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.ID, "upload-field"))
    )
    file_input.send_keys(file_path_local)
    logger.info("Completed finding and sending file to file input.")

    # Check for upload success message
    try:
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located(
                (By.XPATH, "//*[contains(text(), 'Your deck has been processed')]")
            )
        )
        logger.info("Completed upload.")
    except TimeoutException:
        logger.info("Waiting for success message took too much time!")

    time.sleep(5)


def save_changes(driver: webdriver.Chrome):
    """
    Function to save changes on SpeakerDeck
    """
    # Scroll into view of the save button
    save_button_element = driver.find_element(
        By.XPATH, "//button[contains(text(), 'Save this deck')]"
    )
    driver.execute_script("arguments[0].scrollIntoView();", save_button_element)

    time.sleep(5)

    # Click save button
    try:
        save_button = WebDriverWait(driver, 30).until(
            EC.element_to_be_clickable(
                (By.XPATH, "//button[contains(text(), 'Save this deck')]")
            )
        )
        save_button.click()
        logger.info("Completed finding and clicking save button.")
    except TimeoutException:
        logger.info("Waiting for save button took too much time!")

    # Check for update success message
    try:
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located(
                (
                    By.XPATH,
                    "//*[contains(text(), 'Presentation updated successfully.')]",
                ),
            )
        )
        logger.info("Completed update process.")
    except TimeoutException:
        logger.info("Waiting for update success message took too much time!")


if __name__ == "__main__":
    setup_logger()
    upload_file(FILE_PATH)

watchdogを起動するためのコード

これを起動して、新しいPDFが来るのを待ち構えることになります。このコードは変更はありません。

watchdog_monitor_local.py

import logging
import os
import sys
import threading
import time

from upload_to_speakerdeck import FILE_PATH  # Imported from the second script
from upload_to_speakerdeck import upload_file  # Imported from the second script
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer

FILE_DIR = os.path.dirname(FILE_PATH)
FILE_NAME = os.path.splitext(os.path.basename(FILE_PATH))[0]
DEBOUNCE_SECONDS = 5  # Ignore multiple events closer together than thisb

# Define logger globally
logger = logging.getLogger()


def setup_logger():
    logger.setLevel(logging.INFO)
    handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
    )
    handler.setFormatter(formatter)
    logger.addHandler(handler)


# Create a lock
lock = threading.Lock()


class PDFHandler(FileSystemEventHandler):
    """
    Custom event handler class for PDF file changes
    Inherits from the watchdog module's FileSystemEventHandler class
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.last_modified_time = 0
        self.last_file_path = ""
        print(f"Warting for new {FILE_NAME} will be save to {FILE_DIR}..")

    def on_modified(self, event):
        """
        Event handler for file modified events
        """
        self.process(event)

    def process(self, event):
        """
        Processes the event of a file modification
        """
        if not event.is_directory and event.src_path.endswith(".pdf"):
            base_file_name = os.path.basename(event.src_path)  # Get the file name
            if base_file_name.startswith(FILE_NAME):
                current_time = time.time()
                if (current_time - self.last_modified_time > DEBOUNCE_SECONDS) and (
                    event.src_path != self.last_file_path
                ):
                    print(f"File {event.src_path} has been modified")
                    print("Start uploading..")
                    self.last_modified_time = current_time
                    self.last_file_path = event.src_path

                    with lock:
                        upload_file(event.src_path)


if __name__ == "__main__":
    setup_logger()

    path = FILE_DIR
    event_handler = PDFHandler()
    observer = Observer()
    observer.schedule(event_handler, path, recursive=False)
    observer.start()
    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        observer.stop()
    observer.join()

使い方

前提条件の項目を準備する
upload_to_speakerdeck.py、watchdog_monitor_local.pyを同じフォルダに保存する
変数を設定する
python /path/to/watchdog_monitor_local.py でスタート
指定したファイルを、指定したフォルダに入れる(エクスポートなど)と、開始する
- Watchdogなしで、Seleniumを起動したほうが便利なときもあるかもしれません

まとめ

SpeakerDeckのスライドを更新したときに自動でアップロードするPythonスクリプトを作りました
AWSでもやってみたのですが、Seleniumが載せづらいとか、あまり便利にならず難しいものだという感想
- ログを逐一みたいとなるとCloudWatchLogsが使いにくい・・・

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up