DaVinci Resolveで字幕の自動追加

Last updated at 2024-07-04Posted at 2024-07-03

追記
後から気づきましたが、この方法だと字幕が後から編集できません・・。
captionFusionCompTools.StyledText = string.gsub(captionText, '\\n', '\n') とりあえずこんな感じで改行を事前に入れるようにします。

前置き

DaVinci ResolveとVOICEVOXで動画を作っています。作業の流れは以下の通り。

スプレッドシートで台本作成
台本を元にVOICEVOXで音声ファイル作成
DaVinci Resolveに音声ファイルと字幕を打ち込む

最後のステップについて、音声ファイルは全選択してDaVinci Resolveに放り込めばOKですが、字幕については普通にやると 1) テキストエフェクトを作成して 2) セリフを打ち込んで 3) クリップの長さを音声ファイルと合わせるという作業が必要になりセリフは100程度あるのでやってられません。この手順を自動化したい。

調べたらこちらの方が自動化スクリプトを作成されていました。

ただ、「字幕の表示時間を変更できない」という問題が指摘されており、100個ある字幕クリップを音声の長さに合わせる作業は虚無なのでなんとかできないか色々トライしてみたところ、DaVinci Resolve APIのアップデートとハックを組み合わせることで解決できたのでやり方をまとめます。

スクリプト

print("LoadCaptionsWithDuration.lua start")

function mysplit(inputstr, sep)
  if sep == nil then
    sep = "%s"
  end
  local t = {}
  for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
    table.insert(t, str)
  end
  return t
end

function getLastElement(t)
  return t[#t]
end

function getIndexNumber(str)
  return tonumber(string.match(str, "^%d+"))
end

function timecodeToFrames(timecode, fps)
  local h, m, s, f = string.match(timecode, "(%d+):(%d+):(%d+):(%d+)")
  return (tonumber(h) * 3600 + tonumber(m) * 60 + tonumber(s)) * fps + tonumber(f)
end

function sleep(n)
  os.execute("sleep " .. tonumber(n))
end

projectManager = resolve:GetProjectManager()
project = projectManager:GetCurrentProject()
timeline = project:GetCurrentTimeline()
mediaPool = project:GetMediaPool()
rootFolder = mediaPool:GetRootFolder()
subfolders = rootFolder:GetSubFolderList()
fps = 30

function getSubFolderByName(folderName)
  for i = 1, #subfolders do
    if subfolders[i]:GetName() == folderName then
      return subfolders[i]
    end
  end
  return nil
end

function getTrackIndexByName(trackType, trackName)
  local trackCount = timeline:GetTrackCount(trackType)
  for i = 1, trackCount do
    if timeline:GetTrackName(trackType, i) == trackName then
      return i
    end
  end
  return nil
end

function insertCaptionsAndVoices(captionList, captionTrackIndex, voiceTrackIndex)
  voicesFolder = getSubFolderByName("Voices")
  if voicesFolder == nil then
    print("Voices folder not found")
    return
  end
  mediaPool:SetCurrentFolder(voicesFolder)
  
  voiceClipList = mediaPool:GetCurrentFolder():GetClipList()
  voiceClipTable = {}
  for i = 1, #voiceClipList do
    voiceClipTable[getIndexNumber(voiceClipList[i]:GetName())] = voiceClipList[i]
  end

  captionsFolder = getSubFolderByName("Captions")
  if captionsFolder == nil then
    captionsFolder = mediaPool:AddSubFolder(rootFolder, "Captions")
  end
  mediaPool:SetCurrentFolder(captionsFolder)

  local offset = timecodeToFrames(timeline:GetStartTimecode(), fps)

  for i = 1, #captionList, 3 do
    local voiceIndex = tonumber(captionList[i])
    local characterName = captionList[i + 1]
    local captionText = captionList[i + 2]

    local captionFusionTitle = timeline:InsertFusionTitleIntoTimeline("Text+")
    local captionFusionCompTools = captionFusionTitle:GetFusionCompByIndex(1):GetToolList()[1]
    captionFusionCompTools.StyledText = captionText
    -- 字幕の属性の設定
    -- captionFusionCompTools.Size = ...

    local captionFusionClip = timeline:CreateFusionClip(captionFusionTitle)
    timeline:DeleteClips({captionFusionClip})

    local captionClipItem = getLastElement(mediaPool:GetCurrentFolder():GetClipList())

    local voiceClip = voiceClipTable[voiceIndex]
    local voiceClipDuration = timecodeToFrames(voiceClip:GetClipProperty()["Duration"], fps)

    mediaPool:AppendToTimeline({{
      mediaPoolItem = captionClipItem,
      startFrame = 0,
      endFrame = voiceClipDuration,
      recordFrame = offset,
      mediaType = 1, -- Video only
      trackIndex = captionTrackIndex
    }})

    mediaPool:AppendToTimeline({{
      mediaPoolItem = voiceClip,
      startFrame = 0,
      endFrame = voiceClipDuration,
      recordFrame = offset,
      mediaType = 2, -- Audio only
      trackIndex = voiceTrackIndex
    }})

    offset = offset + voiceClipDuration
    
    sleep(0.1)
  end
end

-- Show dialog

local ui = fu.UIManager
local disp = bmd.UIDispatcher(ui)
local width,height = 400,200
 
win = disp:AddWindow({
  ID = 'Dialog',
  WindowTitle = 'Load Captions',
  Geometry = { 100, 100, width, height },
  Spacing = 5,
 
  ui:VGroup{
    ID = 'root',
   
    ui:Label{ID = 'L', Text = 'Caption Loader'},

    ui:TextEdit{ID='CaptionList', Text = '', PlaceholderText = 'Character name and Caption text'},

    ui:HGroup{
      ui:Button{ID = 'ButtonOK', Text = 'OK'},
      ui:Button{ID = 'ButtonCancel', Text = 'Cancel'},
    }
  },
})
 
local captionData

itm = win:GetItems()

function win.On.ButtonOK.Clicked(ev)
  captionData = itm.CaptionList.PlainText
  disp:ExitLoop()
end

function win.On.ButtonCancel.Clicked(ev)
  -- Only close the dialog
  disp:ExitLoop()
end

function win.On.Dialog.Close(ev)
  disp:ExitLoop()
end
 
win:Show()
disp:RunLoop()
win:Hide()

-- Insert captions

captionData = string.sub(captionData, 2, -2)
local captionList = mysplit(captionData, "\n")

local captionTrackIndex = getTrackIndexByName("video", "Caption")
if captionTrackIndex == nil then
  timeline:AddTrack("video")
  timeline:SetTrackName("video", timeline:GetTrackCount("video"), "Caption")
  captionTrackIndex = getTrackIndexByName("video", "Caption")
end

local voiceTrackIndex = getTrackIndexByName("audio", "Voice")
if voiceTrackIndex == nil then
  timeline:AddTrack("audio", "mono")
  timeline:SetTrackName("audio", timeline:GetTrackCount("audio"), "Voice")
  voiceTrackIndex = getTrackIndexByName("audio", "Voice")
end

insertCaptionsAndVoices(captionList, captionTrackIndex, voiceTrackIndex)

手元の環境(DaVinci Resolve 18.6)でとりあえず動いたものです。UIなど非常に適当、FPSも固定です。

台本はスプレッドシートからコピペしたものを扱えるような感じです。音声ファイルはVoices以下に配置してメディアプールに追加し、さらに各セリフと音声ファイルの対応が取れるようにしておく必要があります。

説明

https://note.com/hitsugi_yukana/n/na5f6617532f1 によると、

2023年の5月頃のアップデートで、MediaPool:AppendToTimeline()は指定の再生時間へ追加できるようになりました。

とのことなので、これを利用します。ただ、Timeline:InsertFusionTitleIntoTimeline()で作った字幕クリップはTimelineItemなのでMediaPool:AppendToTimeline()が使えません。なので、一旦Timeline:CreateFusionClip()でMediaPoolに追加してからMediaPool:AppendToTimeline()をします。手順としては以下になります。

Timeline:InsertFusionTitleIntoTimeline()でFusionテキストの作成と字幕属性のセット
作ったFusionテキストをTimeline:CreateFusionClip()でFusionClipにしてMediaPoolに追加
Timeline:DeleteClips()で最初に作ったFusionテキストをタイムラインから削除（不要かつ邪魔なので）
MediaPoolに追加したFusionClipをMediaPool:AppendToTimeline()でフレーム情報を含めてタイムラインに追加

MediaPool:AppendToTimeline()はフレーム情報としてstartFrame、endFrame、recordFrameの３つを指定できます。正直意味がわかっていませんが、次のように値を与えることでうまくいきました。

startFrame = 0
endFrame = 音声ファイルのフレーム数
recordFrame = 字幕クリップを挿入したい位置

また、追加したいトラックも指定できるので便利です。

感想

謎ハックすぎて良いとか悪いとかよくわかりません。DaVinci ResolveのAPIが物足りないと思いました。

Reference

情報がネット上に散逸していて開発しにくいDaVinci Resolve APIの利用法ですが、DaVinci Resolveと一緒にインストールされる以下のREADME.txtを見るのが一番良さそうです。

C:\ProgramData\Blackmagic Design\DaVinci Resolve\Support\Developer\Scripting\README.txt
/Library/Application Support/Blackmagic Design/DaVinci Resolve/Developer/Scripting/README.txt

こちらも参考にしました。ありがとうございます。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up