【Python】Mask R-CNN/mask2former対応！マスク画像からMS-COCO形式のアノテーションJsonを自動生成

Last updated at 2025-02-10Posted at 2025-02-10

こんにちは。柚子柿です。
今日は、MaskRCNNやmask2formerをFineTurningするために必要な、MS-COCO形式のJsonファイルを、ターゲットをマスキングしたバイナリ画像から自動作成するコードを紹介します。

import os
import json
import cv2
import shutil
import numpy as np
from pycocotools import mask as maskUtils
from sklearn.model_selection import train_test_split

def get_segmentation_points(mask):
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    segmentation = []
    for contour in contours:
        contour = contour.flatten().tolist()
        if len(contour) > 4:  # 有効な輪郭は最低でも4つの点が必要
            segmentation.append(contour)
    return segmentation

def create_coco_annotation(images_dir, mask_dir, output_file):
    annotations = []
    images = []
    categories = []
    category_id = 1

    for class_name in os.listdir(mask_dir):
        class_dir = os.path.join(mask_dir, class_name)
        if os.path.isdir(class_dir) and class_name != "coco" and class_name != "images":  # "coco"と"images"フォルダを無視
            category = {
                "supercategory": "none",
                "id": int(category_id),
                "name": class_name
            }
            categories.append(category)

            for image_id, image_file in enumerate(os.listdir(images_dir)):
                if image_file.endswith('.jpg'):
                    image_file_name = os.path.splitext(image_file)[0]
                    mask_file = os.path.join(class_dir, image_file_name + '.png')
                    if os.path.exists(mask_file):
                        image_path = os.path.join(images_dir, image_file)
                        image = cv2.imread(mask_file, cv2.IMREAD_GRAYSCALE)
                        height, width = image.shape

                        # COCO 画像情報
                        image_info = {
                            "license": 1,
                            "file_name": image_file,
                            "coco_url":"dummy/COCO/images/URL",
                            "height": int(height),
                            "width": int(width),
                            "date_captured":"2024/06/26",
                            "flickr_url":"dummy/flickr/images/URL",
                            "id": int(image_id)
                        }
                        images.append(image_info)

                        # セグメンテーション情報
                        segmentation = get_segmentation_points(image)
                        if segmentation:
                            # バウンディングボックス情報
                            num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(image)
                            for label in range(1, num_labels):  # 0は背景なので無視
                                x, y, w, h, area = stats[label]
                                bbox = [int(x), int(y), int(w), int(h)]

                                annotation = {
                                    "segmentation": segmentation,
                                    "area": int(area),
                                    "iscrowd": 0,
                                    "image_id": int(image_id),
                                    "bbox": bbox,
                                    "category_id": int(category_id),
                                    "id": int(len(annotations) + 1)
                                }
                                annotations.append(annotation)

            category_id += 1

    coco_format = {
        "info": {
            "description": "COCO 2017 Dataset",
            "url": "http://cocodataset.org",
            "version": "1.0",
            "year": 2017,
            "contributor": "COCO Consortium",
            "date_created": "2017/09/01"
        },
        "licenses": [
            {
                "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
                "id": 1,
                "name": "Attribution-NonCommercial-ShareAlike License"
            },
            {
                "url": "http://creativecommons.org/licenses/by-nc/2.0/",
                "id": 2,
                "name": "Attribution-NonCommercial License"
            },
            {
                "url": "http://creativecommons.org/licenses/by-nc-nd/2.0/",
                "id": 3,
                "name": "Attribution-NonCommercial-NoDerivs License"
            },
            {
                "url": "http://creativecommons.org/licenses/by/2.0/",
                "id": 4,
                "name": "Attribution License"
            },
            {
                "url": "http://creativecommons.org/licenses/by-sa/2.0/",
                "id": 5,
                "name": "Attribution-ShareAlike License"
            },
            {
                "url": "http://creativecommons.org/licenses/by-nd/2.0/",
                "id": 6,
                "name": "Attribution-NoDerivs License"
            },
            {
                "url": "http://flickr.com/commons/usage/",
                "id": 7,
                "name": "No known copyright restrictions"
            },
            {
                "url": "http://www.usa.gov/copyright.shtml",
                "id": 8,
                "name": "United States Government Work"
            }
        ],
        "images": images,
        "annotations": annotations,
        "categories": categories
    }

    with open(output_file, 'w') as f:
        json.dump(coco_format, f, indent=4)

def split_dataset(images_dir, train_dir, val_dir, split_ratio=0.7):
    images = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]
    train_images, val_images = train_test_split(images, train_size=split_ratio, random_state=42)
    
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    
    for image in train_images:
        shutil.copy(os.path.join(images_dir, image), os.path.join(train_dir, image))
    
    for image in val_images:
        shutil.copy(os.path.join(images_dir, image), os.path.join(val_dir, image))

if __name__ == "__main__":
    base_dir = "path/to/dataset"
    images_dir = os.path.join(base_dir, "images")
    coco_dir = os.path.join(base_dir, "coco")
    train_dir = os.path.join(coco_dir, "train2017")
    val_dir = os.path.join(coco_dir, "val2017")
    annotations_dir = os.path.join(coco_dir, "annotations")
    
    # 画像を7:3に分割してコピー
    split_dataset(images_dir, train_dir, val_dir, split_ratio=0.7)
    
    # アノテーションを生成
    os.makedirs(annotations_dir, exist_ok=True)
    create_coco_annotation(train_dir, base_dir, os.path.join(annotations_dir, "instances_train2017.json"))
    create_coco_annotation(val_dir, base_dir, os.path.join(annotations_dir, "instances_val2017.json"))

    print(f"COCO形式のアノテーションJsonファイルが{annotations_dir}に保存されました。")

以上です！

使い方：
path/to/datasetに下記のようにディレクトリ構造を作る

---coco
 |-images（ここに原画像を全て格納）
 |-classA（ここにclassAのバイナリ画像を格納）
 |-classB（ここにclassBのバイナリ画像を格納）
 …

各クラスのバイナリ画像は、存在する分だけ格納したらよいです。
例えば原画像はimage1.jpg、image2.jpg、image3.jpgの3つあるとします。
classAはimage1.jpg、image2.jpgに写っているとします。
classBはimage2.jpg、image3.jpgに写っているとします。
この場合、下記のような構成になります。

※原画像はjpg、バイナリ画像はpngで表現
---coco
 |-images（image1.jpg、image2.jpg、image3.jpg）
 |-classA（image1.png、image2.png）
 |-classB（image2.png、image3.png）

このようにディレクトリ構造を設定して、プログラムを実行したら、MS-COCO形式のJsonファイルが自動作成されます！！

確認方法：
自動作成したアノテーションJsonファイルが、正しくアノテーションできているかどうかは、下記のGitHubのコードを使えば確認できます。
👇

使い方

python cocoviewer.py -i my_dataset/images/train -a my_dataset_train.json

-i 画像ディレクトリ
-a jsonファイル
を指定

上手くアノテーションできていれば写真のようになります。

👆GitHubから引用

以上です！

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up