YOLO11を使用した物体検出(動画)、CSVファイル作成（初心者向け）

Posted at 2025-02-20

YOLOv11を用いた動画解析

一定間隔でのオブジェクト検出とデータ保存

処理概要

roboflowを利用してアノテーションデータを作成（別途記述）
保存済み動画のフレームを一定間隔ごとに取得
YOLOv11でオブジェクト検出を実行
検出結果（バウンディングボックス座標・カテゴリなど）をCSVファイルに記録
バウンディングボックスを描画した画像を保存
(画像ファイル名にCSVへの書き込み時間を付加し紐づけを行っています)

作成環境

Windows11 pro 16GBメモリ corei5 13GEN
python 3.11.6
ultlytics 8.3.53
opencv 4.10.0.84

プログラムはChatGPT4oを利用して作成しています。
（ミスも多いため手作業による修正は必須です）

動画切り出し画像（jpg）

バウンディングボックズ情報（CSV）

プログラム（コメントはChatGPTによる自動作成）

movie_time_ver.py

from ultralytics import YOLO  # YOLOモデルを使用するためのライブラリ
from datetime import datetime, timedelta  # 現在時刻の取得や時間間隔の計算に使用
import cv2  # OpenCVライブラリ：画像や動画の処理
import csv  # CSVファイルの書き込み用ライブラリ
import os  # ファイル操作（存在確認・ディレクトリ作成など）
import time  # 時間制御用ライブラリ

# ================================
# 設定（ファイルパスなど）
# ================================
csv_filename = '' # 出力するCSVファイルのパス
model_path = ''  # YOLOモデルのパス
video_path = ''  # 入力動画のパス
output_image_dir = ''  # 処理後の画像を保存するディレクトリ

# ================================
# 必要なファイル・ディレクトリの確認と作成
# ================================
if not os.path.exists(model_path):
   raise FileNotFoundError(f"モデルファイルが見つかりません: {model_path}")

if not os.path.exists(video_path):
   raise FileNotFoundError(f"動画ファイルが見つかりません: {video_path}")

if not os.path.exists(output_image_dir):
   os.makedirs(output_image_dir)  # ディレクトリが存在しない場合は作成

# CSVファイル名（拡張子なし）を取得
csv_base_filename = os.path.splitext(os.path.basename(csv_filename))[0]    

# ================================
# CSVファイルの作成とヘッダーの書き込み
# ================================
header = ['date', 'No', 'Class', 'Label', 'Scores', 'id', 'x1', 'y1', 'x2', 'y2', 'time']
file = open(csv_filename, mode='w', newline='', encoding='utf-8')  # CSVファイルを新規作成
writer = csv.writer(file)  # CSV書き込みオブジェクトの作成
writer.writerow(header)  # ヘッダー情報をCSVに書き込む

# ================================
# YOLOモデルのロード
# ================================
model = YOLO(model_path)

# ================================
# 動画ファイルの読み込み
# ================================
cap = cv2.VideoCapture(video_path)  # 動画ファイルの読み込み

frame_cnt = 0  # フレームカウントの初期化
last_write_time = datetime.now() - timedelta(seconds=10)  # 最後の処理時間を初期化

# ================================
# 動画フレームの解析ループ
# ================================
while cap.isOpened():
   success, frame = cap.read()  # フレームを取得

   if success:
       frame_cnt += 1  # フレーム数をカウント
       cv2.imshow("YOLOv11 Inference", frame)  # 画面にフレームを表示

       # 一定間隔（2秒ごと）に解析処理を実行
       current_timestamp = datetime.now()
       if current_timestamp - last_write_time >= timedelta(seconds=2):
           # YOLOモデルを使ってフレームを解析
           results = model.track(frame, persist=True, conf=0.5, classes=[0, 2, 7])
           items = results[0]  # 検出結果を取得
           csv_rows = []  # 書き込むデータを格納するリスト
           annotated_frame = frame.copy()  # 画像描画用にフレームをコピー

           for item in items:
               # 現在のタイムスタンプを取得
               current_timestamp = datetime.now()
               date = current_timestamp.strftime('%Y/%m/%d')
               timestamp = current_timestamp.strftime('%H:%M:%S')
               timestamp_str = current_timestamp.strftime('%Y-%m-%d_%H-%M-%S')

               # 検出結果の各パラメータを取得
               cls = int(item.boxes.cls)  # クラスID
               label = item.names[int(cls)]  # クラス名
               score = item.boxes.conf.cpu().numpy()[0]  # 信頼スコア
               x1, y1, x2, y2 = item.boxes.xyxy.cpu().numpy()[0]  # バウンディングボックス座標

               # トラッキングIDの取得
               id_value = item.boxes.id
               track_ids = '' if id_value is None else item.boxes.id.int().cpu().tolist()[0]

               # CSVデータを作成
               csv_data = [date, str(frame_cnt), str(cls), str(label), str(score), str(track_ids),
                           str(x1), str(y1), str(x2), str(y2),  timestamp]
               csv_rows.append(csv_data)

               # バウンディングボックスの描画
               cv2.rectangle(annotated_frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
               
               # ラベル描画
               text = f"{label}: {score:.2f}" if track_ids else f"{label}: {score:.2f}"
               font_scale = 0.75 # フォントサイズ
               cv2.putText(annotated_frame, text, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 0), 2)

           writer.writerows(csv_rows)  # CSVに書き込み
           cv2.imwrite(os.path.join(output_image_dir, f"{csv_base_filename}_{timestamp_str}.jpg"), annotated_frame)  # 画像保存
           last_write_time = current_timestamp  # 最後の処理時間を更新

       if cv2.waitKey(1) & 0xFF == ord("q"):
           break
   else:
       break

cap.release()
cv2.destroyAllWindows()
file.close()

動画を解析するだけのプログラム（参考）

解析保存動画のスクリーンショット

プログラム（コメントはChatGPTによる自動作成）

movie_ver.py

import cv2
from ultralytics import YOLO

# ================================
# 設定（動画ファイルのパスなど）
# ================================
video_path = ''  # 処理対象の動画ファイル
model_path = ''  # YOLOモデルのパス
output_path = ''  # 出力動画の保存パス

# ================================
# YOLOモデルのロード
# ================================
model = YOLO(model_path)

# ================================
# 動画ファイルの読み込み
# ================================
cap = cv2.VideoCapture(video_path)

# 動画が正しく開けたか確認
if not cap.isOpened():
   print("Error: Could not open video.")
   exit()

# ================================
# 出力動画の設定
# ================================
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 出力フォーマットの指定（MP4）
out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))  # 動画ライターの設定

# ================================
# 動画フレームの解析ループ
# ================================
while(cap.isOpened()):
   ret, frame = cap.read()  # フレームを取得
   if ret:
       # YOLOv8による物体検出を実行
       results = model.predict(frame, conf=0.5, iou=0.45, device='cpu', verbose=False)
       # conf: 信頼度閾値（0.5）
       # iou: IoU閾値（0.45）
       # device: 'cpu' または 'cuda'（GPU使用時は'cuda'）
       # verbose: 詳細ログの抑制
       
       # 検知結果を描画
       annotated_frame = results[0].plot()
       
       # 出力動画にフレームを書き込む
       out.write(annotated_frame)
       
       # フレームを表示
       cv2.imshow('Frame', annotated_frame)
       
       # 'q'キーが押されたら処理を終了
       if cv2.waitKey(1) & 0xFF == ord('q'):
           break
   else:
       break

# ================================
# リソースの解放
# ================================
cap.release()
out.release()
cv2.destroyAllWindows()

機械学習、roboflow利用方法に関しては後日作成します。

roboflow

画像、引用
ANA
HANEDA,Tokyo International Airport Terminal2【LIVE】ANN/テレ朝
 Ultralytics
yolov8を用いてmp4動画ファイルの物体検知をする

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up