More than 3 years have passed since last update.

物体検出モデル作成でデータ拡張した場合のxmlファイルの座標変換方法

Posted at 2020-12-02

実現したいこと

tensorflowのobject detectionなどで、物体検出モデルを作成する場合にxmlファイルが必要である。
データ拡張して画像枚数を増やしたい場合に、オリジナル画像のみをアノテーションし、データ拡張した画像のxmlファイルをオリジナル画像のxmlファイルから作成したい。

データ拡張の方法

画像の回転と、上下左右反転の組み合わせで拡張する。

データ拡張の組み合わせによって座標変換するコード例

以下の関数を、画像の回転、上下左右反転の組み合わせにより実行すると、データ拡張後の座標を求めることができる。

def transform_coordinate_by_origin(width, height, coordinate):
    """画像の真ん中を原点とした場合の座標に変換する関数

     1. y座標の上下を入れ替える
     2. 画像の真ん中を原点とした場合の座標に変換する

    Args:
        width (int): 画像の横サイズ
        height (int): 画像の縦サイズ
        coordinate (list): 座標 <例>[org_xmin, org_ymin, org_xmax, org_ymax]

    Returns:
        transformed_coordinate (list): 座標 <例>[左上座標(xxx, xxx), 右下座標(xxx, xxx), 左下座標(xxx, xxx), 右上座標(xxx, xxx)]

    """   
    
    org_xmin, org_ymin, org_xmax, org_ymax = coordinate
    
    w_center = width // 2
    h_center = height // 2
    
    centered_left_upper = (org_xmin - w_center, height - org_ymin - h_center)
    centered_right_under = (org_xmax - w_center, height - org_ymax - h_center)
    centered_left_under = (org_xmin - w_center, height - org_ymax - h_center)
    centered_right_upper = (org_xmax - w_center, height - org_ymin - h_center)
    
    transformed_coordinate = [centered_left_upper, centered_right_under, centered_left_under, centered_right_upper]
    
    return transformed_coordinate


def get_rotate_coordinate(angle, transformed_coordinate):
    """回転後の座標を返す関数

     点Ｐ（x,y）を原点を中心として角度θだけ反時計回りに回転した点Ｐ'（x',y'）の座標は以下の式で求められる。
      x' =x*cosθ-y*sinθ
      y' =x*sinθ-y*cosθ

    Args:
        angle (int): 0→時計回りに90度回転、1→180度回転、2→反時計回りに90度回転 3→何もしない　のいずれか
        transformed_coordinate (list): 座標 <例>[左上座標(xxx, xxx), 右下座標(xxx, xxx), 左下座標(xxx, xxx), 右上座標(xxx, xxx)]

    Returns:
        rotated_coordinate (list): 座標 <例>[左上座標(xxx, xxx), 右下座標(xxx, xxx), 左下座標(xxx, xxx), 右上座標(xxx, xxx)]

    """
    
    if angle == 0: # 時計回りに90度回転（時計回りに270度回転）
        theta = 270
    elif angle == 1: # 180度回転
        theta = 180
    else: # 反時計回りに90度回転
        theta = 90
   
    sin_theta = math.sin(math.radians(theta))
    cos_theta = math.cos(math.radians(theta))
        
    rotated_coordinate = [(coord[0] * cos_theta - coord[1] * sin_theta, coord[0] * sin_theta - coord[1] * cos_theta) for coord in transformed_coordinate]
    
    return rotated_coordinate


def get_inverted_coordinate(rotated_coordinate):
    """上下左右反転後の座標を返す関数

     点Ｐ（x,y）を原点に対象移動した点Ｐ'（x',y'）の座標は以下の式で求められる。
      x' =-x
      y' =-y

    Args:
        rotated_coordinate (list): 座標 <例>[左上座標(xxx, xxx), 右下座標(xxx, xxx), 左下座標(xxx, xxx), 右上座標(xxx, xxx)]

    Returns:
        inverted_coordinate (list): 座標 <例>[左上座標(xxx, xxx), 右下座標(xxx, xxx), 左下座標(xxx, xxx), 右上座標(xxx, xxx)]

    """    
    inverted_coordinate = [(-coord[0], -coord[1]) for coord in rotated_coordinate]
   
    return inverted_coordinate



def get_final_coordinate(width, height, inverted_coordinate, angle, flip):
    """画像の真ん中を原点とした座標からオリジナル基準の座標に変換する関数

     1. 画像の真ん中を原点とした場合の座標に変換する
     2. y座標の上下を入れ替える

    Args:
        width (int): 画像の横サイズ
        height (int): 画像の縦サイズ
        inverted_coordinate (list): 座標 <例>[左上座標(xxx, xxx), 右下座標(xxx, xxx), 左下座標(xxx, xxx), 右上座標(xxx, xxx)]
        angle (int): 0→時計回りに90度回転、1→180度回転、2→反時計回りに90度回転 3→何もしない　のいずれか
        flip (int): 0→何もしない、1→上下左右反転　のいずれか

    Returns:
        final_coordinate (list): 座標 <例>[左上座標(xxx, xxx), 右下座標(xxx, xxx), 左下座標(xxx, xxx), 右上座標(xxx, xxx)]

    """

    w_center = width // 2
    h_center = height // 2
    
    org_coordinate_list = [(coord[0] + w_center, coord[1] + h_center) for coord in inverted_coordinate]

    # y座標をoriginalに変換（0の位置が逆になる）
    if (flip == 1 and angle != 1) or (flip == 0 and angle == 1):
        org_y_coordinate_list = org_coordinate_list
    else:
        org_y_coordinate_list = [(coord[0], height - coord[1]) for coord in org_coordinate_list]
    
    # 四捨五入
    final_coordinate = [(int(Decimal(str(coord[0])).quantize(Decimal('0'), rounding=ROUND_HALF_UP)), int(Decimal(str(coord[1])).quantize(Decimal('0'), rounding=ROUND_HALF_UP))) for coord in org_y_coordinate_list]
    
    return final_coordinate

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up