More than 3 years have passed since last update.

youtubeのURLから字幕を生成

Posted at 2021-06-17

youtubeのURLから字幕を生成

https://qiita.com/saber72237/items/9528f5979b845a94f730
で取得したテキストファイル名をpathに入れる
path = '？？？？'
字幕ファイルは、クロームのダウンロードフォルダに保存される。

jimaku.py

from bs4 import BeautifulSoup
import urllib.request
import re
from time import sleep
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import csv
import time
import datetime
import pandas as pd
from selenium.webdriver.common.action_chains import ActionChains

path = '？？？？'

# urlリストを定義
data = pd.read_table(path, encoding="utf-8")
urls = data["url"]

# クロームをヘッドレスモードにする
options = Options()
# options.add_argument('--headless')


# クロームドライバーの指定とオプション
driver = webdriver.Chrome(executable_path="C:\code\python\chromedriver.exe", chrome_options=options)
# ログインページを開く
driver.get("https://downsub.com/")

now = datetime.datetime.now()
filename = now.strftime('%Y%m%d_%H%M%S') + '.txt'
f = open(filename,'a', encoding='cp932',errors='ignore')

for url in urls:
# フォーム入力
    sleep(1)
    driver.switch_to.window(driver.window_handles[0])
    link=url
    sleep(1)
    driver.find_element_by_name("url").send_keys(Keys.CONTROL + "a")
    driver.find_element_by_name("url").send_keys(Keys.DELETE)
    driver.find_element_by_name("url").send_keys(url)
    sleep(1)
# ダウンロードクリック
    driver.find_element_by_xpath("//div[@class='v-input__append-outer']").click()
    sleep(1)
    try:
        driver.find_element_by_xpath("//button[@class='ma-1 download-button v-btn v-btn--depressed v-btn--flat v-btn--outlined theme--light v-size--default primary--text']").click()
    except:
        f.write(driver.current_url)
        f.write("\n")
    sleep(1)

f.close()

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up