Gradioで作る画像アノテーション＆クロップツール - matplotlib生成画像でアノテーション作成を試す

Last updated at 2025-07-13Posted at 2025-07-13

はじめに

機械学習用のデータセット作成や画像解析において、画像にアノテーション（注釈）を付ける作業は重要な工程です。今回は、Gradioとgradio-image-annotationを使って、直感的に操作できる画像アノテーション＆クロップツールを作成します。

matplotlibで生成したシンプルな図形画像を作成して、そこにアノテーションを追加してみます。筆者はgoogle colabで実装・動作確認しました。

必要なライブラリのインストール

!pip install gradio gradio-image-annotation numpy

実装の概要

今回作成するツールには、次の2つの機能があります。

オブジェクトアノテーション機能：図形の画像上に円や四角形、三角形などのバウンディングボックスを描き、ラベルを付けることができます。
クロップ機能：画像の特定の部分を切り取ることができます。

完成イメージ

実装例

import gradio as gr
from gradio_image_annotation import image_annotator
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from io import BytesIO

def create_sample_image():
    """matplotlib で3つのオブジェクトを含むシンプルな画像を生成"""
    fig, ax = plt.subplots(1, 1, figsize=(8, 6), dpi=100)
    ax.set_xlim(0, 800)
    ax.set_ylim(0, 600)
    ax.set_aspect('equal')
    
    # 背景色を設定
    ax.set_facecolor('#f0f0f0')
    
    # 3つの図形を明確に配置
    # 赤い円 - 左側
    circle = patches.Circle((200, 300), 100, facecolor='red', edgecolor='darkred', linewidth=4)
    ax.add_patch(circle)
    
    # 青い四角形 - 中央
    rectangle = patches.Rectangle((350, 250), 150, 100, facecolor='blue', edgecolor='darkblue', linewidth=4)
    ax.add_patch(rectangle)
    
    # 緑の三角形 - 右側
    triangle = patches.Polygon([(600, 350), (700, 350), (650, 250)], facecolor='green', edgecolor='darkgreen', linewidth=4)
    ax.add_patch(triangle)
    
    # 軸とタイトルを非表示
    ax.set_xticks([])
    ax.set_yticks([])
    ax.invert_yaxis()  # Y軸を反転してimage座標系に合わせる
    
    # 画像をnumpyの配列として取得
    fig.canvas.draw()
    
    # 新しいmatplotlib APIを使用
    width, height = fig.canvas.get_width_height()
    buf = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
    buf = buf.reshape(height, width, 4)  # RGBA形式
    
    # RGBに変換（アルファチャンネルを削除）
    buf_rgb = buf[:, :, :3]
    
    plt.close()
    
    return buf_rgb

def create_crop_image():
    """クロップ用のシンプルな画像を生成"""
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    ax.set_xlim(0, 600)
    ax.set_ylim(0, 400)
    ax.set_aspect('equal')
    
    # グラデーション背景
    x = np.linspace(0, 600, 100)
    y = np.linspace(0, 400, 100)
    X, Y = np.meshgrid(x, y)
    Z = np.sin(X/100) + np.cos(Y/80)
    ax.contourf(X, Y, Z, levels=20, cmap='viridis', alpha=0.7)
    
    # 中央に大きな円
    circle = patches.Circle((300, 200), 100, facecolor='orange', edgecolor='red', linewidth=5, alpha=0.8)
    ax.add_patch(circle)
    
    ax.set_xticks([])
    ax.set_yticks([])
    ax.invert_yaxis()
    
    # 画像をnumpyの配列として取得
    fig.canvas.draw()
    
    # 新しいmatplotlib APIを使用
    width, height = fig.canvas.get_width_height()
    buf = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
    buf = buf.reshape(height, width, 4)  # RGBA形式
    
    # RGBに変換（アルファチャンネルを削除）
    buf_rgb = buf[:, :, :3]
    
    plt.close()
    
    return buf_rgb

# 画像を生成
sample_image_array = create_sample_image()
crop_image_array = create_crop_image()

# サンプルアノテーションデータ
example_annotation = {
    "image": sample_image_array,
    "boxes": [
        {
            "xmin": 110,
            "ymin": 210,
            "xmax": 290,
            "ymax": 390,
            "label": "Circle",
            "color": (255, 0, 0)
        },
        {
            "xmin": 350,
            "ymin": 250,
            "xmax": 500,
            "ymax": 350,
            "label": "Rectangle",
            "color": (0, 0, 255)
        },
        {
            "xmin": 580,
            "ymin": 250,
            "xmax": 720,
            "ymax": 350,
            "label": "Triangle",
            "color": (0, 255, 0)
        }
    ]
}

# クロップ用のサンプルデータ
examples_crop = [
    {
        "image": crop_image_array,
        "boxes": [
            {
                "xmin": 150,
                "ymin": 50,
                "xmax": 450,
                "ymax": 350,
                "color": (100, 200, 255),
            }
        ],
    },
]

def crop(annotations):
    """画像をクロップする関数"""
    # 回転が指定されている場合は回転を適用
    if angle := annotations.get("orientation", None):
        annotations["image"] = np.rot90(annotations["image"], k=-angle)
    
    # バウンディングボックスが存在する場合はクロップ
    if annotations["boxes"]:
        box = annotations["boxes"][0]
        return annotations["image"][
            box["ymin"]:box["ymax"],
            box["xmin"]:box["xmax"]
        ]
    return None

def get_boxes_json(annotations):
    """バウンディングボックスの情報をJSON形式で取得"""
    return annotations["boxes"]

# Gradioインターフェースの構築
with gr.Blocks() as demo:
    gr.Markdown("# 図形画像でのアノテーション作成デモ")
    gr.Markdown("matplotlibで生成した図形画像を使って、アノテーション作成とクロップ機能を体験できます。")
    
    # オブジェクトアノテーション機能
    with gr.Tab("Object annotation", id="tab_object_annotation"):
        gr.Markdown("### 3つの図形にアノテーションが設定されています")
        annotator = image_annotator(
            example_annotation,
            label_list=["Circle", "Rectangle", "Triangle", "Other"],
            label_colors=[(255, 0, 0), (0, 0, 255), (0, 255, 0), (255, 128, 0)],
            image_type="numpy",
        )
        button_get = gr.Button("Get bounding boxes")
        json_boxes = gr.JSON()
        button_get.click(get_boxes_json, annotator, json_boxes)
    
    # クロップ機能
    with gr.Tab("Crop", id="tab_crop"):
        gr.Markdown("### 画像の一部を切り取ってみましょう")
        with gr.Row():
            annotator_crop = image_annotator(
                examples_crop[0],
                image_type="numpy",
                disable_edit_boxes=True,
                single_box=True,
            )
            image_crop = gr.Image()
        button_crop = gr.Button("Crop")
        button_crop.click(crop, annotator_crop, image_crop)
        gr.Examples(examples_crop, annotator_crop)
    
    # キーボードショートカット
    with gr.Accordion("Keyboard Shortcuts"):
        gr.Markdown("""
        - **C**: Create mode
        - **D**: Drag mode
        - **E**: Edit selected box (same as double-click a box)
        - **Delete**: Remove selected box
        - **Space**: Reset view (zoom/pan)
        - **Enter**: Confirm modal dialog
        - **Escape**: Cancel/close modal dialog
        """)

if __name__ == "__main__":
    demo.launch()

画像生成について

1. matplotlib による図形画像生成

このツールでは、matplotlibを使って図形を描画した画像を生成し、そのままnumpy配列としてGradioに渡しています。生成される画像には、赤い円（左側、中心が(200, 300)、半径100）、青い四角形（中央、位置(350, 250)、サイズ150×100）、緑の三角形（右側の三頂点）などが含まれます。クロップ用の画像では、sin/cos関数で描かれたグラデーション背景の上に、中央に大きなオレンジの円が描かれています。

2. numpy配列への変換

# 画像をnumpyの配列として取得
fig.canvas.draw()

# 新しいmatplotlib APIを使用
width, height = fig.canvas.get_width_height()
buf = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
buf = buf.reshape(height, width, 4)  # RGBA形式

# RGBに変換（アルファチャンネルを削除）
buf_rgb = buf[:, :, :3]

新しいmatplotlibではbuffer_rgba()メソッドを使用し、RGBA形式のデータからRGBに変換してGradioに渡します。

3. 座標系の調整

ax.invert_yaxis()  # Y軸を反転してimage座標系に合わせる

matplotlib の座標系（下が原点）を画像座標系（上が原点）に合わせるため、Y軸を反転しています。

実際の動作確認

1. オブジェクトアノテーション機能の試し方

コードを実行すると、最初のタブに3つの図形が描画された画像が表示されます
各図形に対応するアノテーションが既に設定されています：
- 赤い円（赤色のボックス）: 左側に配置
- 青い四角形（青色のボックス）: 中央に配置
- 緑の三角形（緑色のボックス）: 右側に配置
「Get bounding boxes」ボタンを押すと、以下のようなJSON形式でデータが出力されます：

[
  {
    "xmin": 110,
    "ymin": 210,
    "xmax": 290,
    "ymax": 390,
    "label": "Circle",
    "color": [255, 0, 0]
  },
  {
    "xmin": 350,
    "ymin": 250,
    "xmax": 500,
    "ymax": 350,
    "label": "Rectangle",
    "color": [0, 0, 255]
  },
  {
    "xmin": 580,
    "ymin": 250,
    "xmax": 720,
    "ymax": 350,
    "label": "Triangle",
    "color": [0, 255, 0]
  }
]

2. クロップ機能の試し方

「Crop」タブに切り替えると、グラデーション背景にオレンジの円が描画された画像が表示されます
青色のボックスが事前に設定されており、中央部分を囲んでいます
「Crop」ボタンを押すと、ボックス内の領域が切り取られて右側に表示されます

3. キーボードショートカット

アノテーション作業を効率化するため、以下のキーボードショートカットが利用できます：

C: 新しいバウンディングボックスの作成モード
D: ドラッグモード（パンとズーム）
E: 選択されたボックスの編集（ダブルクリックと同じ効果）
Delete: 選択されたボックスの削除
Space: ビューのリセット（ズームとパンの初期化）
Enter: モーダルダイアログの確定
Escape: モーダルダイアログのキャンセル

実際の活用シーン

このツールは、シンプルなアプリとして気軽に試してみるのに適しています。アノテーション作業や座標の扱い方、UIの操作などを体験するのにちょうどよく、学習や検証の入り口として使いやすい構成になっています。

まとめ

このツールは、複雑なアノテーション作業に取り組む前の練習用としてぴったりです。ぜひ、自分のニーズに合わせて使いながら、アノテーションの理解を深めてみてください。

参考リンク

Gradio公式ドキュメント

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up