pip install youtube_transcript_api googletrans==4.0.0-rc1 h2==3.*
###Easy and lazy compositting
case: google colab
from youtube_transcript_api import YouTubeTranscriptApi
from google.colab import files
#import time
import sys
from urllib.parse import urlparse, parse_qs
urltext ='https://www.youtube.com/watch?v=JV7GqhDQosE'
args = [urltext]
video_id = ''
print()
print()
def extract_video_id(url):
query = urlparse(url)
if query.hostname == 'youtu.be': return query.path[1:]
if query.hostname in {'www.youtube.com', 'youtube.com'}:
if query.path == '/watch': return parse_qs(query.query)['v'][0]
if query.path[:7] == '/embed/': return query.path.split('/')[2]
if query.path[:3] == '/v/': return query.path.split('/')[2]
# fail?
return None
for url in args:
video_id = (extract_video_id(url))
print('youtube video_id:',video_id)
line =[]
line[:] = YouTubeTranscriptApi.get_transcript(video_id,languages=['en'])
text_list = []
for l in line:
##print("text: ", l['text'])
##print("start:", l['start'])
##print("duration:", l['duration'])
l['text']=l['text'].strip()
l['text']=l['text'].rstrip('\n')
l['text']=l['text'].rstrip('\r')
l['text']=l['text'].replace('\r','')
l['text']=l['text'].replace('\n',' ')
text_list.append(l['text'])
##text_list[:] = [a for a in text_list if a != ' ']
##text_list[:] = [l.replace('\n',' ') for l in text_list]
##print(line)
del line
##print(text_list)
##original_stdout = sys.stdout ## stdout backup
filename = 'subtitle.txt' ## print subtitle text to this file
with open(filename, 'w') as f:
##sys.stdout = f # stdout to file
print('youtube video_id:',video_id)
print()
print("haywhnk-A.K.A-@dauuricus")
print("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")
print("YouTube captions")
print("- - - - - - - - - - - - - - - - - - - YouTube - - - - - - - - - - - - - - - - - - -")
print()
print()
line = YouTubeTranscriptApi.list_transcripts(video_id)
transcript = line.find_transcript(['en'])
#print(transcript.fetch())
caption_line =[]
for count, dict_obj in enumerate(transcript.fetch()):
##print(count+1,' ', dict_obj['text'] )
caption_line.append(dict_obj['text'])
for count, l in enumerate(caption_line):
print(count+1,' ',l)
print()
print()
print("************************************************************************************")
print()
print("Youtube captions")
print("- - - - - - - - - - - - - - - - - - translated - - - - - - - - - - - - - - - - - - -")
print()
print()
translated = transcript.translate('ja')
for count, dict_obj in enumerate(translated.fetch()):# japanese
print( count+1,' ', dict_obj['text'] )
## print()
## print("-----------------------------------------------------------------------------------")
## print()
## print("captions text compositimg")
## print("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")
## print()
## print()
def line_edit2(textlines): ##2 lines to 1 line
text_compo = []
txt = ''
for count,l in enumerate(textlines):
if (count+1)%2 == 0:
txt = text_compo.pop()
txt += ' ' +l
text_compo.append(txt)
else :
txt = l
text_compo.append(txt)
return text_compo
def line_edit3(textlines): ##3 lines to 1 line
text_compo = []
txt = ''
i = 0
for count,l in enumerate(textlines):
if i == 0:
txt += l
i = i + 1
text_compo.append(txt)
elif i == 1:
txt = text_compo.pop()
txt += ' ' + l
i = i + 1
text_compo.append(txt)
elif i == 2:
txt = text_compo.pop()
txt += ' ' + l
text_compo.append(txt)
txt = ''
i = 0
return text_compo
def line_edit(textlines): ##2 lines to 1 line
text_compo = []
i = 0
txt = ''
for count,l in enumerate(textlines):
if i == 0:
txt += l
i = i + 1
text_compo.append(txt)
elif i == 1:
txt += ' ' +l
text_compo.pop()
text_compo.append(txt)
i = 0
txt = ''
return text_compo
print()
print()
print("************************************************************************************")
print()
print()
print()
print()
print("************************************************************************************")
print("shrink text")
print()
## for count, l in enumerate(text_e):
## print(count+1,l)
print()
print()
# text_compo = (line_edit2(text_list))
# for count, l in enumerate(text_compo):
# print(l)
print()
print()
text_compo = (line_edit3(text_list))
text_compo[:] = (line_edit3(text_compo))
#for count, l in enumerate(text_compo):
# print(l)
text_compo2 = (line_edit2(text_compo))
text_compo2[:] = (line_edit2(text_compo2))
for count, l in enumerate(text_compo2):
print(l)
print()
print("************************************************************************************")
print()
print()
print("************************************************************************************")
print()
print("Thank you.")
#sys.stdout = original_stdout # stdout back
##files.download(filename)
#import re
import h2.connection
import h2.config
from googletrans import Translator
#import sys
#####uploaded = files.upload()
####filename = ''
####for fn in uploaded.keys():
#### print('User uploaded file "{name}" with length {length} bytes'.format(
#### name=fn, length=len(uploaded[fn])))
#### filename = fn
#filename = 'subtitle.txt'
#args= sys.argv
##args = [('translate.py'),filename]
##print('open '+args[1])
##with open(args[1]) as f: # uploaded file
## line = f.readlines()
##line[:] = [l.strip() for l in line]
##line[:] = [l.rstrip('\n') for l in line]
##line[:] = [a for a in line if a != '']
##line[:] = [l.replace('\n',' ') for l in line]
##line[:] = [l.replace('\r',' ') for l in line]
#print(line)
#print()
####for line_num,l in enumerate(line):
#### if re.search(r'.*?i'm$',l):
#### print(line_num,' ',l)
#### elif re.search(r'.*?to/Z',l):
#### print(line_num,' ',l)
#### if re.search(r'.*?the$',l):
#### print(line_num,' ',l)
#### elif re.search(r'.*?the/Z',l):
#### print(line_num,' ',l)
#for line_num,l in enumerate(line):
# print(line_num,' ',l)
translator = Translator()
num = 20
#obj_num = 1
filename = 'translated.txt'
backup_stdout = sys.stdout
print("translating...")
print()
with open(filename,'w') as f:
#sys.stdout = f
for count, l in enumerate(text_compo2):
if count +1< num:
translated = translator.translate(l, dest='ja')
##print(count+1,' ', l) # original text
print(translated.text)
else:
translated = translator.translate(l, dest='ja')
##print(count+1,' ', l) # original text
print(translated.text)
del translator
num = num + 20
#obj_num = obj_num + 1
#print("")
#print("--- translator :", obj_num)
#print("")
translator = Translator()
#sys.stdout = backup_stdout # back
del translator
print("saving...",filename)
# files.download(filename) # translated.txt
case: not google colab
youtube-translate.py
from youtube_transcript_api import YouTubeTranscriptApi
#import time
import sys
from urllib.parse import urlparse, parse_qs
urltext ='https://www.youtube.com/watch?v=dQqkHjvj2zU'
args = [urltext]
video_id = ''
print()
print()
def extract_video_id(url):
query = urlparse(url)
if query.hostname == 'youtu.be': return query.path[1:]
if query.hostname in {'www.youtube.com', 'youtube.com'}:
if query.path == '/watch': return parse_qs(query.query)['v'][0]
if query.path[:7] == '/embed/': return query.path.split('/')[2]
if query.path[:3] == '/v/': return query.path.split('/')[2]
# fail?
return None
for url in args:
video_id = (extract_video_id(url))
print('youtube video_id:',video_id)
#video_id = 'PNZtCP4K8AE' ## youtube video_id example
line =[]
line[:] = YouTubeTranscriptApi.get_transcript(video_id,languages=['en'])
text_list = []
for l in line:
##print("text: ", l['text'])
##print("start:", l['start'])
##print("duration:", l['duration'])
l['text']=l['text'].strip()
l['text']=l['text'].rstrip('\n')
l['text']=l['text'].rstrip('\r')
l['text']=l['text'].replace('\r','')
l['text']=l['text'].replace('\n',' ')
text_list.append(l['text'])
##text_list[:] = [a for a in text_list if a != ' ']
##text_list[:] = [l.replace('\n',' ') for l in text_list]
##print(line)
del line
##print(text_list)
##original_stdout = sys.stdout ## stdout backup
filename = 'subtitle.txt' ## print subtitle text to this file
with open(filename, 'w') as f:
##sys.stdout = f # stdout to file
print('youtube video_id:',video_id)
print()
print("haywhnk-A.K.A-@dauuricus")
print("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")
print("YouTube captions")
print("- - - - - - - - - - - - - - - - - - - YouTube - - - - - - - - - - - - - - - - - - -")
print()
print()
line = YouTubeTranscriptApi.list_transcripts(video_id)
transcript = line.find_transcript(['en'])
#print(transcript.fetch())
caption_line =[]
for count, dict_obj in enumerate(transcript.fetch()):
print( dict_obj['text'] )
## caption_line.append(dict_obj['text'])
## print()
## print()
## print("************************************************************************************")
## print()
## print("Youtube captions")
## print("- - - - - - - - - - - - - - - - - - translated - - - - - - - - - - - - - - - - - - -")
## print()
## print()
## translated = transcript.translate('ja')
## for count, dict_obj in enumerate(translated.fetch()):# japanese
## print( count+1, dict_obj['text'] )
## print()
## print("-----------------------------------------------------------------------------------")
## print()
## print("captions text compositimg")
## print("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")
## print()
## print()
def line_edit2(textlines): ##2 lines to 1 line
text_compo = []
txt = ''
for count,l in enumerate(textlines):
if (count+1)%2 == 0:
txt = text_compo.pop()
txt += ' ' +l
text_compo.append(txt)
else :
txt = l
text_compo.append(txt)
return text_compo
def line_edit3(textlines): ##3 lines to 1 line
text_compo = []
txt = ''
i = 0
for count,l in enumerate(textlines):
if i == 0:
txt += l
i = i + 1
text_compo.append(txt)
elif i == 1:
txt = text_compo.pop()
txt += ' ' + l
i = i + 1
text_compo.append(txt)
elif i == 2:
txt = text_compo.pop()
txt += ' ' + l
text_compo.append(txt)
txt = ''
i = 0
return text_compo
def line_edit(textlines): ##2 lines to 1 line
text_compo = []
i = 0
txt = ''
for count,l in enumerate(textlines):
if i == 0:
txt += l
i = i + 1
text_compo.append(txt)
elif i == 1:
txt += ' ' +l
text_compo.pop()
text_compo.append(txt)
i = 0
txt = ''
return text_compo
### print()
### print()
### print("************************************************************************************")
### print()
### print()
## for count, l in enumerate(text_list):
## print(count+1,l)
print()
print()
print("************************************************************************************")
print("shrink text")
print()
## for count, l in enumerate(text_e):
## print(count+1,l)
print()
print()
# text_compo = (line_edit2(text_list))
# for count, l in enumerate(text_compo):
# print(l)
print()
print()
text_compo = (line_edit2(text_list))
text_compo[:] = (line_edit3(text_compo))
#for count, l in enumerate(text_compo):
# print(l)
text_compo2 = (line_edit2(text_compo))
text_compo2[:] = (line_edit2(text_compo2))
for count, l in enumerate(text_compo2):
print(l)
print()
print("************************************************************************************")
print()
print()
print("************************************************************************************")
print()
print("Thank you.")
#sys.stdout = original_stdout # stdout back
#import re
import h2.connection
import h2.config
from googletrans import Translator
translator = Translator()
num = 20
#obj_num = 1
filename = 'translated.txt'
backup_stdout = sys.stdout
print("translating...")
with open(filename,'w') as f:
#sys.stdout = f
for count, l in enumerate(text_compo2):
if count < 0:
continue
else:
if count +1< num:
translated = translator.translate(l, dest='ja')
##print(count+1,' ', l) # original text
print(translated.text)
else:
translated = translator.translate(l, dest='ja')
##print(count+1,' ', l) # original text
print(translated.text)
del translator
num = num + 20
#obj_num = obj_num + 1
#print("")
#print("--- translator :", obj_num)
#print("")
translator = Translator()
#sys.stdout = backup_stdout # back
del translator
print("saving...",filename)