0. 概要
あらゆる最新のアルゴリズムの評価にCOCOのデータセットが用いられている。すなわち、学習も識別もCOCOフォーマットに最適化されている。自身の画像をCOCOフォーマットで作っておけば、サクッと入れ替えられるため便利である。
1. フォーマット
以下の全体フォーマットを基に作成していく。
今回はセグメンテーション用にフルモデルを作成する。
{
"info": {...},
"licenses": [...],
"images": [...],
"annotations": [...],
"categories": [...], <-- Not in Captions annotations
"segment_info": [...] <-- Only in Panoptic annotations
}
Info
あってもなくてもといった感じ。
"info": {
"description": "COCO 2017 Dataset",
"url": "http://cocodataset.org",
"version": "1.0",
"year": 2017,
"contributor": "COCO Consortium",
"date_created": "2017/09/01"
}
以下、Pythonコード
import json
import collections as cl
def info():
tmp = cl.OrderedDict()
tmp["description"] = "Test"
tmp["url"] = "https://test"
tmp["version"] = "0.01"
tmp["year"] = 2019
tmp["contributor"] = "xxxx"
tmp["data_created"] = "2019/09/10"
return tmp
def main():
query_list = ["info", "licenses", "images", "annotations", "categories", "segment_info"]
js = cl.OrderedDict()
for i in range(len(query_list)):
tmp = ""
# Info
if query_list[i] == "info":
tmp = info()
# save it
js[query_list[i]] = tmp
# write
fw = open('datasets.json','w')
json.dump(js,fw,indent=2)
if __name__=='__main__':
main()
Licenses
あってもなくてもといった感じ。images
で参照される。
"licenses": [
{
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License"
},
{
"url": "http://creativecommons.org/licenses/by-nc/2.0/",
"id": 2,
"name": "Attribution-NonCommercial License"
},
...
]
以下、Pythonコード
def licenses():
tmp = cl.OrderedDict()
tmp["id"] = 1
tmp["url"] = dummy_words
tmp["name"] = "administrater"
return tmp
images
基本的にid
はユニークで、後のAnnotation等に関連づけられる。
"images": [
{
"license": 4,
"file_name": "000000397133.jpg",
"coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg",
"height": 427,
"width": 640,
"date_captured": "2013-11-14 17:02:52",
"flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg",
"id": 397133
},
{
"license": 1,
"file_name": "000000037777.jpg",
"coco_url": "http://images.cocodataset.org/val2017/000000037777.jpg",
"height": 230,
"width": 352,
"date_captured": "2013-11-14 20:55:31",
"flickr_url": "http://farm9.staticflickr.com/8429/7839199426_f6d48aa585_z.jpg",
"id": 37777
},
...
]
以下、Pythonコード
def images():
tmps = []
for i in range(10):
tmp = cl.OrderedDict()
tmp["license"] = 0
tmp["id"] = i
tmp["file_name"] = str(i) + ".png"
tmp["width"] = "640"
tmp["height"] = "480"
tmp["date_captured"] = "2019-09-01 12:34:56"
tmp["coco_url"] = dummy_words
tmp["flickr_url"] = dummy_words
tmps.append(tmp)
return tmps
annotations
iscrowd=1
の場合は人などが沢山写っている領域全域をbboxでマスクし、segmentationエリアはcounts
とsize
が追加される。
Segmentationの並び順は[x1, y1, x2, y2...]
とポリゴンが続いていく。1つの対象物が切れる(自転車に跨り、自転車が不連続になる)場合は[x1, y1, ...] [x1, y1, ...]
。area
はピクセル数でピクセルをまたがるポリゴンを描く場合小数点も出てくる。
bbox
はx, y, width, height
である。基本的にはmin(poly_x), min(poly_y), max(poly_x) - min(poly_x), max(poly_y) - min(poly_y)
でpolygonから計算できる。
"annotations": [
{
"segmentation": [[510.66,423.01,511.72,420.03,...,510.45,423.01]],
"area": 702.1057499999998,
"iscrowd": 0,
"image_id": 289343,
"bbox": [473.07,395.93,38.65,28.67],
"category_id": 18,
"id": 1768
},
...
{
"segmentation": {
"counts": [179,27,392,41,…,55,20],
"size": [426,640]
},
"area": 220834,
"iscrowd": 1,
"image_id": 250282,
"bbox": [0,34,639,388],
"category_id": 1,
"id": 900100250282
}
]
以下、Pythonコード
def annotations():
tmps = []
for i in range(10):
tmp = cl.OrderedDict()
tmp_segmentation = cl.OrderedDict()
tmp_segmentation = [[10, 8]]
tmp["segmentation"] = tmp_segmentation
tmp["id"] = int(str(1000)+ str(i))
tmp["image_id"] = i
tmp["category_id"] = 62
tmp["area"] = 10
tmp["iscrowd"] = 0
tmp["bbox"] = [10, 10]
tmps.append(tmp)
return tmps
categories
supercategory
が大きなクラスの名前。name
がその配下。
もし、構成品がある場合は、keypoints
で指定、それらのつながりはskelton
で指定する。
"categories": [
{
"supercategory": "person",
"id": 1,
"name": "person",
"keypoints": [
"nose","left_eye","right_eye","left_ear","right_ear",
"left_shoulder","right_shoulder","left_elbow","right_elbow",
"left_wrist","right_wrist","left_hip","right_hip",
"left_knee","right_knee","left_ankle","right_ankle"
],
"skeleton": [
[16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13],[6,7],
[6,8],[7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]
]
}
]
以下、Pythonコード
def categories():
tmps = []
sup = ["animal", "pill"]
cat = ["dog", "allergy"]
for i in range(2):
tmp = cl.OrderedDict()
tmp["id"] = str(i)
tmp["supercategory"] = sup[i]
tmp["name"] = cat[i]
tmps.append(tmp)
return tmps
2. Visualize
Jupyterで以下のようなコードを書く。
基本は以下のコードを改変
https://gist.github.com/akTwelve/dc79fc8b9ae66828e7c7f648049bc42d#file-coco_image_viewer-ipynb
オンライン上から参照できる画像しか上手く表示できないのでBase64エンコーディングしてオフライン画像の表示も対応させた。
import IPython
import os
import json
import random
import numpy as np
import requests
from io import BytesIO
from math import trunc
from PIL import Image as PILImage
from PIL import ImageDraw as PILImageDraw
import base64
# Load the dataset json
class CocoDataset():
def __init__(self, annotation_path, image_dir):
self.annotation_path = annotation_path
self.image_dir = image_dir
self.colors = colors = ['blue', 'purple', 'red', 'green', 'orange', 'salmon', 'pink', 'gold',
'orchid', 'slateblue', 'limegreen', 'seagreen', 'darkgreen', 'olive',
'teal', 'aquamarine', 'steelblue', 'powderblue', 'dodgerblue', 'navy',
'magenta', 'sienna', 'maroon']
json_file = open(self.annotation_path)
self.coco = json.load(json_file)
json_file.close()
self.process_info()
self.process_licenses()
self.process_categories()
self.process_images()
self.process_segmentations()
def display_info(self):
print('Dataset Info:')
print('=============')
for key, item in self.info.items():
print(' {}: {}'.format(key, item))
requirements = [['description', str],
['url', str],
['version', str],
['year', int],
['contributor', str],
['date_created', str]]
for req, req_type in requirements:
if req not in self.info:
print('ERROR: {} is missing'.format(req))
elif type(self.info[req]) != req_type:
print('ERROR: {} should be type {}'.format(req, str(req_type)))
print('')
def display_licenses(self):
print('Licenses:')
print('=========')
requirements = [['id', int],
['url', str],
['name', str]]
for license in self.licenses:
for key, item in license.items():
print(' {}: {}'.format(key, item))
for req, req_type in requirements:
if req not in license:
print('ERROR: {} is missing'.format(req))
elif type(license[req]) != req_type:
print('ERROR: {} should be type {}'.format(req, str(req_type)))
print('')
print('')
def display_categories(self):
print('Categories:')
print('=========')
for sc_key, sc_val in self.super_categories.items():
print(' super_category: {}'.format(sc_key))
for cat_id in sc_val:
print(' id {}: {}'.format(cat_id, self.categories[cat_id]['name']))
print('')
def display_image(self, image_id, show_polys=True, show_bbox=True, show_crowds=True, use_url=False):
print('Image:')
print('======')
if image_id == 'random':
image_id = random.choice(list(self.images.keys()))
# Print the image info
image = self.images[image_id]
for key, val in image.items():
print(' {}: {}'.format(key, val))
# Open the image
if use_url:
image_path = image['coco_url']
response = requests.get(image_path)
image = PILImage.open(BytesIO(response.content))
else:
image_path = os.path.join(self.image_dir, image['file_name'])
image = PILImage.open(image_path)
# Calculate the size and adjusted display size
max_width = 600
image_width, image_height = image.size
adjusted_width = min(image_width, max_width)
adjusted_ratio = adjusted_width / image_width
adjusted_height = adjusted_ratio * image_height
# Create list of polygons to be drawn
polygons = {}
bbox_polygons = {}
rle_regions = {}
poly_colors = {}
print(' segmentations ({}):'.format(len(self.segmentations[image_id])))
for i, segm in enumerate(self.segmentations[image_id]):
polygons_list = []
if segm['iscrowd'] != 0:
# Gotta decode the RLE
px = 0
x, y = 0, 0
rle_list = []
for j, counts in enumerate(segm['segmentation']['counts']):
if j % 2 == 0:
# Empty pixels
px += counts
else:
# Need to draw on these pixels, since we are drawing in vector form,
# we need to draw horizontal lines on the image
x_start = trunc(trunc(px / image_height) * adjusted_ratio)
y_start = trunc(px % image_height * adjusted_ratio)
px += counts
x_end = trunc(trunc(px / image_height) * adjusted_ratio)
y_end = trunc(px % image_height * adjusted_ratio)
if x_end == x_start:
# This is only on one line
rle_list.append({'x': x_start, 'y': y_start, 'width': 1 , 'height': (y_end - y_start)})
if x_end > x_start:
# This spans more than one line
# Insert top line first
rle_list.append({'x': x_start, 'y': y_start, 'width': 1, 'height': (image_height - y_start)})
# Insert middle lines if needed
lines_spanned = x_end - x_start + 1 # total number of lines spanned
full_lines_to_insert = lines_spanned - 2
if full_lines_to_insert > 0:
full_lines_to_insert = trunc(full_lines_to_insert * adjusted_ratio)
rle_list.append({'x': (x_start + 1), 'y': 0, 'width': full_lines_to_insert, 'height': image_height})
# Insert bottom line
rle_list.append({'x': x_end, 'y': 0, 'width': 1, 'height': y_end})
if len(rle_list) > 0:
rle_regions[segm['id']] = rle_list
else:
# Add the polygon segmentation
for segmentation_points in segm['segmentation']:
segmentation_points = np.multiply(segmentation_points, adjusted_ratio).astype(int)
polygons_list.append(str(segmentation_points).lstrip('[').rstrip(']'))
polygons[segm['id']] = polygons_list
if i < len(self.colors):
poly_colors[segm['id']] = self.colors[i]
else:
poly_colors[segm['id']] = 'white'
bbox = segm['bbox']
bbox_points = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1],
bbox[0] + bbox[2], bbox[1] + bbox[3], bbox[0], bbox[1] + bbox[3],
bbox[0], bbox[1]]
bbox_points = np.multiply(bbox_points, adjusted_ratio).astype(int)
bbox_polygons[segm['id']] = str(bbox_points).lstrip('[').rstrip(']')
# Print details
print(' {}:{}:{}'.format(segm['id'], poly_colors[segm['id']], self.categories[segm['category_id']]))
# tif to png
image.convert(mode="I")
image.save("./tmp.png", "png")
image = PILImage.open("./tmp.png")
with open("tmp.png", 'rb') as f:
data = f.read()
# base64
b64=base64.b64encode(data)
img_b64 = "data:image/png;base64,"+ b64.decode('utf-8') # binary to string
html = '<div class="container" style="position:relative;">'
html += '<img src="{}" style="position:relative;top:0px;left:0px;width:{}px;">'.format(img_b64, adjusted_width)
html += '<div class="svgclass"><svg width="{}" height="{}">'.format(adjusted_width, adjusted_height)
if show_polys:
for seg_id, points_list in polygons.items():
fill_color = poly_colors[seg_id]
stroke_color = poly_colors[seg_id]
for points in points_list:
html += '<polygon points="{}" style="fill:{}; stroke:{}; stroke-width:1; fill-opacity:0.5" />'.format(points, fill_color, stroke_color)
if show_crowds:
for seg_id, rect_list in rle_regions.items():
fill_color = poly_colors[seg_id]
stroke_color = poly_colors[seg_id]
for rect_def in rect_list:
x, y = rect_def['x'], rect_def['y']
w, h = rect_def['width'], rect_def['height']
html += '<rect x="{}" y="{}" width="{}" height="{}" style="fill:{}; stroke:{}; stroke-width:1; fill-opacity:0.5; stroke-opacity:0.5" />'.format(x, y, w, h, fill_color, stroke_color)
if show_bbox:
for seg_id, points in bbox_polygons.items():
fill_color = poly_colors[seg_id]
stroke_color = poly_colors[seg_id]
html += '<polygon points="{}" style="fill:{}; stroke:{}; stroke-width:1; fill-opacity:0" />'.format(points, fill_color, stroke_color)
html += '</svg></div>'
html += '</div>'
html += '<style>'
html += '.svgclass { position:absolute; top:0px; left:0px;}'
html += '</style>'
return html
def process_info(self):
self.info = self.coco['info']
def process_licenses(self):
self.licenses = self.coco['licenses']
def process_categories(self):
self.categories = {}
self.super_categories = {}
for category in self.coco['categories']:
cat_id = category['id']
super_category = category['supercategory']
# Add category to the categories dict
if cat_id not in self.categories:
self.categories[cat_id] = category
else:
print("ERROR: Skipping duplicate category id: {}".format(category))
# Add category to super_categories dict
if super_category not in self.super_categories:
self.super_categories[super_category] = {cat_id} # Create a new set with the category id
else:
self.super_categories[super_category] |= {cat_id} # Add category id to the set
def process_images(self):
self.images = {}
for image in self.coco['images']:
image_id = image['id']
if image_id in self.images:
print("ERROR: Skipping duplicate image id: {}".format(image))
else:
self.images[image_id] = image
def process_segmentations(self):
self.segmentations = {}
for segmentation in self.coco['annotations']:
image_id = segmentation['image_id']
if image_id not in self.segmentations:
self.segmentations[image_id] = []
self.segmentations[image_id].append(segmentation)
ヘッダの表示
annotation_path = 'ANNO_PATH'
image_dir = 'IMG_PATH'
coco_dataset = CocoDataset(annotation_path, image_dir)
coco_dataset.display_info()
coco_dataset.display_licenses()
coco_dataset.display_categories()
画像IDを指定して表示
html = coco_dataset.display_image(1)
IPython.display.HTML(html)