YOLOとPythonで始める物体検出入門

Posted at 2024-08-19

1. YOLOとは

YOLOは"You Only Look Once"の略で、高速で精度の高い物体検出アルゴリズムです。1回の推論で複数の物体を検出できる効率的な手法として知られています。

# YOLOの基本概念を示す擬似コード
def yolo_concept(image):
    grid = divide_image_into_grid(image)
    for cell in grid:
        bounding_boxes = predict_bounding_boxes(cell)
        class_probabilities = predict_class_probabilities(cell)
    
    final_detections = apply_non_max_suppression(bounding_boxes, class_probabilities)
    return final_detections

# 使用例
image = load_image("example.jpg")
detections = yolo_concept(image)

2. 環境構築

YOLOを使用するために必要なライブラリをインストールします。

!pip install ultralytics opencv-python numpy matplotlib

3. YOLOモデルのロード

事前学習済みのYOLOv5モデルをロードします。

from ultralytics import YOLO

def load_yolo_model(model_path='yolov5s.pt'):
    model = YOLO(model_path)
    return model

# 使用例
model = load_yolo_model()
print(f"モデルがロードされました: {model}")

4. 画像での物体検出

1枚の画像で物体検出を行います。

def detect_objects_in_image(model, image_path):
    results = model(image_path)
    return results

# 使用例
image_path = 'path/to/image.jpg'
results = detect_objects_in_image(model, image_path)
print(f"検出された物体: {len(results[0].boxes)} 個")

5. 検出結果の可視化

検出結果を画像上に描画します。

import cv2
import numpy as np

def visualize_results(image_path, results):
    img = cv2.imread(image_path)
    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            cv2.putText(img, f"{box.cls[0]:.0f}", (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,0), 2)
    
    cv2.imshow('Result', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# 使用例
visualize_results(image_path, results)

6. ビデオでの物体検出

ビデオストリームでリアルタイム物体検出を行います。

def detect_video(model, video_source=0):
    cap = cv2.VideoCapture(video_source)
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        results = model(frame)
        
        for r in results:
            boxes = r.boxes
            for box in boxes:
                x1, y1, x2, y2 = box.xyxy[0]
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# 使用例
detect_video(model)

7. カスタムデータセットの準備

自前のデータセットで学習する準備をします。

import os
import shutil

def prepare_custom_dataset(dataset_path, output_path):
    os.makedirs(output_path, exist_ok=True)
    for split in ['train', 'val']:
        os.makedirs(os.path.join(output_path, split, 'images'), exist_ok=True)
        os.makedirs(os.path.join(output_path, split, 'labels'), exist_ok=True)
    
    # ここでデータセットの分割とコピー処理を実装
    # 例: 画像とラベルファイルを適切なディレクトリにコピー

# 使用例
prepare_custom_dataset('path/to/raw_dataset', 'path/to/prepared_dataset')

8. データ拡張

学習データを増やすためのデータ拡張テクニックを適用します。

from albumentations import Compose, HorizontalFlip, RandomBrightnessContrast

def augment_data(image):
    transform = Compose([
        HorizontalFlip(p=0.5),
        RandomBrightnessContrast(p=0.2),
    ])
    augmented = transform(image=image)
    return augmented['image']

# 使用例
# original_image = cv2.imread('path/to/image.jpg')
# augmented_image = augment_data(original_image)

9. YOLOモデルの学習

カスタムデータセットでYOLOモデルを学習させます。

def train_yolo(data_yaml, epochs, img_size):
    model = YOLO('yolov5s.pt')
    results = model.train(
        data=data_yaml,
        epochs=epochs,
        imgsz=img_size,
    )
    return model, results

# 使用例
model, results = train_yolo('custom.yaml', 100, 640)
print(f"学習が完了しました。エポック数: {results.epoch}")

10. モデルの評価

学習したモデルの性能を評価します。

def evaluate_model(model, data_yaml):
    results = model.val(data=data_yaml)
    print(f"mAP値: {results.box.map:.3f}")
    print(f"mAP@0.5: {results.box.map50:.3f}")
    return results

# 使用例
eval_results = evaluate_model(model, 'custom.yaml')

11. 推論の高速化

TensorRTやONNXを使って推論を高速化します。

def export_onnx(model, img_size):
    model.export(format='onnx', imgsz=img_size)

# ONNXモデルの使用例
import onnxruntime as ort

def inference_onnx(onnx_path, image_path):
    session = ort.InferenceSession(onnx_path)
    # ここで画像の前処理とONNXモデルでの推論を実装

# 使用例
export_onnx(model, 640)
# inference_onnx('path/to/model.onnx', 'path/to/image.jpg')

12. マルチGPU学習

複数のGPUを使って学習を高速化します。

def multi_gpu_train(data_yaml, epochs, img_size):
    model = YOLO('yolov5s.pt')
    results = model.train(
        data=data_yaml,
        epochs=epochs,
        imgsz=img_size,
        device=[0, 1]  # GPU 0と1を使用
    )
    return model, results

# 使用例
# model, results = multi_gpu_train('custom.yaml', 100, 640)

13. YOLOの応用例

交通監視や医療画像分析などの応用例を紹介します。

def traffic_monitoring(video_path, model):
    cap = cv2.VideoCapture(video_path)
    vehicle_count = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        results = model(frame)
        
        for r in results:
            boxes = r.boxes
            for box in boxes:
                if box.cls[0] in [2, 3, 5, 7]:  # 車、バイク、バス、トラックのクラスID
                    vehicle_count += 1
        
        cv2.putText(frame, f"Vehicles: {vehicle_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow('Traffic Monitoring', frame)
        
        if cv2.waitKey(1) == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# 使用例
# traffic_monitoring('path/to/traffic_video.mp4', model)

14. YOLOの最新バージョン

YOLOv8など最新バージョンの特徴を解説します。

from ultralytics import YOLO

def compare_yolo_versions():
    yolov5 = YOLO('yolov5s.pt')
    yolov8 = YOLO('yolov8s.pt')
    
    image = 'path/to/image.jpg'
    
    results_v5 = yolov5(image)
    results_v8 = yolov8(image)
    
    print(f"YOLOv5 検出数: {len(results_v5[0].boxes)}")
    print(f"YOLOv8 検出数: {len(results_v8[0].boxes)}")

# 使用例
compare_yolo_versions()

15. まとめと今後の展望

YOLOの進化と今後の可能性について考察します。

def future_yolo():
    print("YOLOの今後の展望:")
    print("1. より高速・高精度なバージョンの開発")
    print("2. 3D物体検出への応用")
    print("3. 自己教師あり学習の統合")
    print("4. エッジデバイスでの効率的な実行")
    print("5. マルチモーダル学習との統合")

# 使用例
future_yolo()

以上、PythonでYOLOを使った物体検出の基本から応用までを15章に分けて、各章にコードを含めて解説しました。これらのコードを基に、実際に手を動かしながらYOLOの理解を深めていくことができます。YOLOは日々進化を続けており、コンピュータビジョンの分野で重要な役割を果たし続けることが期待されます。

この記事を通じて、YOLOの基本概念から実践的な応用まで幅広く学ぶことができます。今後も技術の進化に注目しながら、自身のプロジェクトにYOLOを活用していくことをお勧めします。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up