More than 1 year has passed since last update.

AviUtlの.exoファイルから話者情報付きテキストを抽出する方法

Posted at 2023-12-08

イントロダクション

ある日、AviUtlでラジオ動画を作っていると天から声が聞こえた気がした。

「アクセシビリティが低いね」。

確かに自分はラジオ動画が好きだけれど、ラジオ動画だと情報へのアクセス性が悪い気がした。
テキスト情報は切り出して別途上げるべきではないだろうか？
こうして字幕情報を切り出すことにした。

コンテンツの概要

はじめに

・AviUtlって？

AviUtlは動画編集ソフトウェアのうちの1つである。
使い始めた当時は中級者向けといわれていたがWindowsムービーメーカーとかもう見ないし中級者向けでもないのかも？

・exoファイルって何？

AviUtlが出力できるファイルのフォーマットのうちの1つである。
このファイルをインポートすれば編集内容が再現できる。
ちなみに編集ファイルは.aupなので別物らしい。
詳しくは知らないが.exoファイルは中身が日本語で書いているので扱いやすかった。

.exoファイルの中身

exoファイルは[]でアイテムに対するインデックスが記述されており、その下にプロパティが続く。
レイヤー1であればプロパティの中にlayer=1とかが書いているので非常に分かりやすい。
自分の動画ではレイヤー1にグループ制御を配置して縁取りを付与しており、レイヤー2にテキストを置いている。
話者の色で話者を分けているのだった。

ちなみにテキストだけは日本語で書かれていない。
16進数で表現されているということだったので変換にはデコードが必要だった。

プログラムソースコード

import sys
import os
import binascii
import time
import tkinter
import tkinter.filedialog

# ファイルを選択ダイアログを表示してファイルパスを返すための関数
def fileChoceCommand(selectType, defaultDir=os.path.abspath(os.path.dirname(__file__))):
    """
    ファイル選択ダイアログを表示してファイルパスを取得する

    Parameters
    ----------
    selectType : int
        ファイルを1つだけ選択する場合は0、複数選択する場合は1を渡す
    defaultDir : str
        選択ダイアログを開く場合の初期ディレクトリ

    Returns
    ----------
        selectFilePath : any
        取得したファイルのパスか、パスリスト
    """
    # ファイル選択ダイアログの表示
    root = tkinter.Tk()
    root.withdraw()
    fTyp = [("","*")]
    # ここの1行を変更 askopenfilename → askopenfilenames
    if selectType == 0:
        selectFilePath = tkinter.filedialog.askopenfilename(filetypes = fTyp,initialdir = defaultDir)
    else:
        selectFilePath = tkinter.filedialog.askopenfilenames(filetypes = fTyp,initialdir = defaultDir)
        selectFilePath = list(selectFilePath)

    return selectFilePath

# exoファイル内のtext=に記述された16進数を渡すことでテキスト化できる
def decode_text(encoded_hex):
    # 16進数文字列をバイト列に変換
    byte_data = binascii.unhexlify(encoded_hex)
    # UTF-16でデコード
    text = byte_data.decode('UTF-16')
    text = text.split('\x00')
    text = text[0]
    return text

# インプットパスを利用してアウトプットパスを作成
def create_path(exo_file_path):
    # インプットファイルのディレクトリとファイル名を取得
    input_directory = os.path.dirname(exo_file_path)
    input_filename = os.path.basename(exo_file_path)

    # ファイル名の拡張子を分割
    file_name, file_extension = os.path.splitext(input_filename)

    if file_extension != '.exo':
        print('対象のファイルは.exoファイルではありません')
        time.sleep(3)
        sys.exit(0)

    # 出力ファイルのパスを作成
    output_file_name = f"{file_name}_output.txt"
    output_file_path = os.path.join(input_directory, output_file_name)
    return output_file_path

# インプットパスのファイルを読み込んでパスを作成
def extract_text_from_exo(exo_file_path, output_file_path):
    with open(exo_file_path, 'r') as file:
        lines = file.readlines()
    layer_text = []
    # テキストとグループ制御の情報を回収
    # グループ制御がなければプレーンのテキストとして出力する
    text_count = 0
    group_count = 0
    index_count = 0
    while_count = 0
    attribution = ''
    start_frame = ''
    end_frame = ''
    text_contents = ''
    color = ''
    text_array = []
    group_array = []
    color_array_for_distin_speakers = []
    # ファイルを上から読み込み
    for l in range(len(lines)):
        line = lines[l]
        # インデックス部分を判定する
        if line == '[{}]\n'.format(index_count):
            # _name属性を判定する
            while_count = l+1
            while True:
                # _name属性の値があれば
                if '_name' in lines[while_count]:
                    if 'テキスト' in lines[while_count]:
                        attribution = 'text'
                    elif 'グループ制御' in lines[while_count]:
                        attribution = 'group'
                    break
                while_count += 1
            # start, end, attributionに応じた値を取得する
            while_count = l+1
            start_frame = ''
            end_frame = ''
            text_contents = ''
            color = ''
            while True:
                # start, end
                if 'start' in lines[while_count]:
                    start_frame = lines[while_count].split('=', 1)[1].strip()
                if 'end' in lines[while_count]:
                    end_frame = lines[while_count].split('=', 1)[1].strip()
                # attribution
                if attribution == 'text':
                    if 'text' in lines[while_count]:
                        text_contents = lines[while_count].split('=', 1)[1].strip()
                elif attribution == 'group':
                    if 'color' in lines[while_count]:
                        color = lines[while_count].split('=', 1)[1].strip()
                        # カラーは話者ラベリングのために配列として別個管理する
                        new_flag = True
                        for c in range(len(color_array_for_distin_speakers)):
                            if color_array_for_distin_speakers[c] == color:
                                new_flag = False
                                break
                        if new_flag == True:
                            color_array_for_distin_speakers.append(color)
                if (start_frame != '' and end_frame !='') and (text_contents != '' or color != ''):
                    break
                while_count += 1
            # attributionに応じた配列に値を格納する
            if attribution == 'text':
                text_array.append([start_frame,end_frame,decode_text(text_contents)])
            elif attribution == 'group':
                group_array.append([start_frame,end_frame,color])
            index_count += 1
    # for line in lines:
    #     if line.startswith('text='):
    #         # 'text=' 以降の部分を取得し、16進数からデコード
    #         hex_data = line.split('=', 1)[1].strip()
    #         decoded_text = decode_text(hex_data)
    #         layer_text.append(decoded_text)
    # groupが存在すれば取得したフレーム情報を突き合わせてテキストに話者情報を付与する
    # colorに合わせた話者を設定する
    speakers = []
    for c in range(len(color_array_for_distin_speakers)):
        name_speaker = input('カラー<{}>に対応する話者を入力してください: '.format(color_array_for_distin_speakers[c]))
        speakers.append([color_array_for_distin_speakers[c], name_speaker])
    if len(group_array) == 0:
        with open(output_file_path, 'w', encoding='utf-8') as file:
            for text in text_array:
                file.write(text[2] + '\n')
        print('話者情報なしの書き込み処理を実施しました')
    else:
        text_array_with_speaker = []
        for g in range(len(group_array)):
            group_value = group_array[g]
            group_start_frame = group_value[0]
            group_end_frame = group_value[1]
            group_color = group_value[2]
            text_count = 0
            for t in range(len(text_array)):
                text_value = text_array[t]
                text_start_frame = text_value[0]
                text_end_frame = text_value[1]
                text_info = text_value[2]
                if int(group_start_frame) <= int(text_start_frame) and int(group_end_frame) > int(text_end_frame):
                    for s in range(len(speakers)):
                        if speakers[s][0] == group_color:
                            speaker = speakers[s][1]
                            break
                    if text_count == 0:
                        text_array_with_speaker.append('{}\t {}'.format(speaker, text_info))
                    else:
                        text_array_with_speaker.append(text_info)
                    text_count += 1
                if int(group_end_frame) == int(text_end_frame):
                    if text_count == 0:
                        for s in range(len(speakers)):
                            if speakers[s][0] == group_color:
                                speaker = speakers[s][1]
                                break
                        text_array_with_speaker.append('{}\t {}'.format(speaker, text_info))
                    else:
                        text_array_with_speaker.append(text_info)
                    break
        with open(output_file_path, 'w', encoding='utf-8') as file:
            for text in text_array_with_speaker:
                file.write(text + '\n')

def format_dialogue(input_file_path, output_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    formatted_lines = []
    for line in lines:
        # 改行が不自然な箇所を修正
        if '\t' in line:
            # タブ区切りで話者名がある行はそのまま保持
            formatted_lines.append(line.strip())
        else:
            # タブ区切りがない行は前の行と結合し、不要なスペースを除外
            formatted_lines[-1] += ' ' + line.strip()

    # 不要なスペースを除外
    formatted_lines = [line.replace(' ', '') for line in formatted_lines]

    # 編集後の内容を新しいファイルに書き込む
    with open(output_file_path, 'w', encoding='utf-8') as file:
        for line in formatted_lines:
            # line.replace(' ','')
            file.write(line + '\n')

def format_dialogue(input_file_path, output_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    formatted_lines = []
    for line in lines:
        # 改行が不自然な箇所を修正
        if '\t' in line:
            # タブ区切りで話者名がある行はそのまま保持
            formatted_lines.append(line.strip())
        else:
            # タブ区切りがない行は前の行と結合し、不要なスペースを除外
            formatted_lines[-1] += ' ' + line.strip()

    # 不要なスペースを除外
    formatted_lines = [line.replace(' ', '') for line in formatted_lines]

    # 編集後の内容を新しいファイルに書き込む
    with open(output_file_path, 'w', encoding='utf-8') as file:
        for line in formatted_lines:
            # line.replace(' ','')
            file.write(line + '\n')

# ドラッグアンドドロップされたファイルがあればパスを取得
# ドラッグアンドドロップされたファイルがなければファイルを選択
# print(len(sys.argv))
if len(sys.argv) > 1:
    exo_file_path = sys.argv[1]
else:
    # スクリプトを実行
    # exo_file_path = 'path/to/your/file.exo'  # .exoファイルのパス
    exo_file_path = fileChoceCommand(0)

# テキストファイル出力用のパス
output_file_path = create_path(exo_file_path)

# 関数を実行
print('.exoファイルのレイヤー1にグループ制御、レイヤー2に文字を入力してください')
extract_text_from_exo(exo_file_path, output_file_path)
format_dialogue(output_file_path, output_file_path)
print('Text file is created')

スクリプトの使い方

AviUtl側の作業

レイヤー1にグループ制御を配置
レイヤー2にテキストを配置
.exoファイルを出力

スクリプトの作業

本pythonファイルに対象の.exoファイルをドラッグアンドドロップする。
（ドラッグアンドドロップせずダブルクリックなどで起動するとファイル選択ダイアログが開くので.exoファイルを選択する。）
グループ制御で指定されたカラーにそれぞれ付与する名称を入力する
（例えばシアン・黄色のグループ制御を作っている場合
シアンにさかなマン、黄色に紅葉男と名称をつける。）

結果と利点

話者つきでテキストが抽出できた。ラジオ動画の内容をnoteに投稿した。これでアクセシビリティは守られたか。

まとめとこれから

実はテキストを抽出するだけであればすでに.exeファイルを作って公開している方がインターネット上にいた。でも、話者つきであったり今後のカスタマイズを考えると自分で作ったほうがいいと思った。
本プログラムを作成する際にChatGPTを使おうとしたが.exoファイルの解析にどうしても正規表現を使おうとしてしまい「解析に失敗しました」が5回くらい出て、もう使うのを辞めた。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up