0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

ローカルで音声データを素早く文字起こしする手順

0
Last updated at Posted at 2026-03-17

Macbookの画面録画から音声のみを抽出した後にテキストを要約する方法

m4a

wav変換(16kHz mono)

5分ごとに分割

Whisperで文字起こし

1つのテキストに保存

フォルダ構成
project/
├── auto_whisper.py
├── input/
│   ├── a.m4a
│   ├── b.m4a
│   └── c.m4a
└── output/
import os
import subprocess
import glob
from faster_whisper import WhisperModel

# ===== OpenMP回避 =====
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

# ===== 設定 =====
INPUT_DIR = "./input"
OUTPUT_DIR = "./output"
CHUNK_SEC = 300  # 5分

os.makedirs(OUTPUT_DIR, exist_ok=True)

# ===== モデル =====
model = WhisperModel(
    "small",
    device="cpu",
    compute_type="int8",
    cpu_threads=8,
    num_workers=1
)

# ===== m4aファイル取得 =====
files = glob.glob(f"{INPUT_DIR}/*.m4a")

for input_file in files:
    print(f"\n=== 処理開始: {input_file} ===")

    base = os.path.splitext(os.path.basename(input_file))[0]
    wav_file = f"{OUTPUT_DIR}/{base}.wav"

    # ===== ① wav変換 =====
    subprocess.run([
        "ffmpeg", "-y",
        "-i", input_file,
        "-ar", "16000",
        "-ac", "1",
        wav_file
    ])

    # ===== ② 無音除去(強化版)=====
    cleaned_wav = f"{OUTPUT_DIR}/{base}_clean.wav"

    subprocess.run([
        "ffmpeg", "-y",
        "-i", wav_file,
        "-af", "silenceremove=stop_periods=-1:stop_duration=1:stop_threshold=-40dB",
        cleaned_wav
    ])

    # ===== ③ 分割 =====
    chunk_pattern = f"{OUTPUT_DIR}/{base}_chunk_%03d.wav"

    subprocess.run([
        "ffmpeg", "-y",
        "-i", cleaned_wav,
        "-f", "segment",
        "-segment_time", str(CHUNK_SEC),
        "-c", "copy",
        chunk_pattern
    ])

    # ===== ④ 文字起こし(時間補正あり)=====
    chunk_files = sorted(glob.glob(f"{OUTPUT_DIR}/{base}_chunk_*.wav"))

    output_txt = f"{OUTPUT_DIR}/{base}.txt"

    offset = 0

    with open(output_txt, "w", encoding="utf-8") as f:
        for i, chunk in enumerate(chunk_files):
            print(f"処理中: {chunk}")

            segments, _ = model.transcribe(
                chunk,
                beam_size=1,
                vad_filter=True
            )

            for s in segments:
                start = s.start + offset
                end = s.end + offset

                f.write(f"[{start:.2f}-{end:.2f}] {s.text}\n")

            offset += CHUNK_SEC

    print(f"完了: {output_txt}")
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?