More than 5 years have passed since last update.

交通安全くん：Pythonで交通標識の認識

Last updated at 2020-02-16Posted at 2020-02-11

この記事の実行環境はcolaboratory です。

今回の目標
１、今回は赤信号、青信号、止まれ、進入禁止の標識を認識する。
２、赤信号、止まれ、進入禁止の時は音声で注意する。

まず、YouTubeにアップした完成品をご覧ください。
交通安全くん

目次
１、事前準備
２、交通標識のDeepLearning
３、Videoを分析して赤信号、止まれ、進入禁止のあるFrameを検出
４、検出したFrameに音声を入れる

１、事前準備
① 交通標識のある場所でスマホで動画を取りましょう。
　高い画質の効果がいいです。（私の場合は1920*1080、30FPS）
　動画を取る際に交通安全をご注意を！
②「止まれ」、「赤信号」、「進入禁止」の注意音声を作成
　私はgTTSで作成しました、もちろん自分の声を録音してもOKです。

makeAudio_止まれ.ipynb

from gtts import gTTS #Google Text to Speech
from google import colab

# Googleドライブをmount
colab.drive.mount('/content/gdrive')

path="gdrive/My Drive/make_video/"
word="止まれだよ"
tts = gTTS(word,lang='ja') #Provide the string to convert to speech
tts.save(path+'STOP_2.wav') #save the string converted to speech as a .wav file
Audio(path+'STOP_2.wav')

③交通標識(赤信号、青信号、止まれ、進入禁止、青空)の画像を用意します。(２、交通標識のDeepLearningで使用します)
※青空の写真がないと、後程画像分析の時に青空を青信号として認識されてしまうため、
　事前に青空の写真も勉強させます。

Googleの画像検索から画像をダウンロードします。

1.getImage_STOP.ipynb

# 止まれの画像を取る
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install selenium

import urllib.request as req
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from selenium.webdriver.common.keys import Keys 

from google import colab
colab.drive.mount('/content/gdrive')

# ブラウザをheadlessモードで立ち上げてwebsiteを表示
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('chromedriver',options=options)
driver.implicitly_wait(10)

count=1
tempImageCount=0

# 対象URLの指定
driver.get("https://www.google.com/search?rlz=1C1CAFC_enJP862JP870&biw=1366&bih=657&tbm=isch&sxsrf=ACYBGNSNeQ5IaB9V8b-6pc6q9gOtbrY4Uw%3A1577968008026&sa=1&ei=iOENXoiRAfCSr7wP-8ee0As&q=%E6%AD%A2%E3%81%BE%E3%82%8C%E6%A8%99%E8%AD%98&oq=%E6%AD%A2%E3%81%BE%E3%82%8C%E6%A8%99%E8%AD%98&gs_l=img.3..0l2j0i24l8.144019.154252..155304...3.0..0.429.5395.0j1j5j10j1......0....1..gws-wiz-img.......0i4i37j35i39j0i23j0i4i37i24.zXhLgCDtIBY&ved=0ahUKEwiI9db09OTmAhVwyYsBHfujB7oQ4dUDCAc&uact=5")
time.sleep(3)

while True:
    #imageリスト取得
    image_list = driver.find_elements_by_class_name('rg_ic')
    print(len(image_list))
    
    #ScrollBarを一番下に
    driver.find_element_by_tag_name('body').send_keys(Keys.END)

    if len(image_list) > tempImageCount:
        tempImageCount = len(image_list)
        print('------------------- go to next page --------------------------')
        try:
            #「結果をもっと表示」ボタンを表示
            driver.find_element_by_id('smb').click()
            print('------------------- click success --------------------------')
        except:
            driver.find_element_by_tag_name('body').send_keys(Keys.END)
            print('------------------- KEY END --------------------------')
    else:
        break

# imageリスト取得
image_list = driver.find_elements_by_class_name('rg_ic')
print(len(image_list))
for image in image_list:
        #画像のURL取得
        image_url = image.get_attribute('src')
        
        #画像保存
        try:
            image = req.urlopen(image_url).read()
            with open('gdrive/My Drive/image/交通安全くん/止まれ/'+str(count)+'.jpg',mode='wb') as f:
                f.write(image)
            print('download - {}'.format(count))
            count += 1
        except:
            print('cant open url')

driver.quit()

取得した画像から良いものを選びましょう。（なるべく多い、20枚以上）
良い例：（背景がないもの）

NG例：

２、交通標識のDeepLearning

①画像ファイルをNumpy形式に変換

データセット「MNIST」のデータは以下のような(28,28)の配列です。

↑をマネして、まず画像ファイルを50＊50の正方形にリサイズしてからNumpy形式に変換します。
RGBのカラーモードなので、(50,50,3)の配列になります。

to_Dat.ipynb

import cv2
import os
from sklearn.model_selection import train_test_split
from PIL import Image
import os,glob
import numpy as np

from google import colab
colab.drive.mount('/content/gdrive')
!ls 'gdrive/My Drive'

# 分類のカテゴリーを選ぶ
root_dir = 'gdrive/My Drive/image/交通安全くん２/'
train_dir = 'gdrive/My Drive/data/'
groups = ['青信号','一方通行','止まれ','進入禁止','赤信号','青空']
nb_classes = len(groups)
image_size = 50

# 画像データをNumpy形式に変換
# フォルダごとの画像データを読み込む
X = []
Y = []
# 写真が少ないため、同じ写真を20回勉強させます。人間は物を繰り返して覚えます、Deeplearningも同じかな？
for i in range(0,20,1):
  for idx,group in enumerate(groups):
      image_dir = root_dir + group
      files = glob.glob(image_dir+'/*.jpg')
      for i,f in enumerate(files):
          img = Image.open(f)
          img = img.convert('RGB') #RGBモードに変更
          img = img.resize((image_size,image_size))#50*50にリサイズ
          data = np.asarray(img)
          X.append(data)
          Y.append(idx)
X = np.array(X)
Y = np.array(Y)

X_train,X_test,y_train,y_test = train_test_split(X,Y,random_state=0)
xy = (X_train,X_test,y_train,y_test)
np.save('gdrive/My Drive/data/交通安全くん２.npy', xy)
print(X_train.shape[1:])
print('ok',len(Y))

②ニューラルネットワークでDeeplearning
「畳み込み（Convolution)」の技を紹介します、以下のURLを参考ください。
畳み込みニューラルネットワークとは？手順も丁寧に解説
↑のように画像データを2次元フィルターを掛けた後、横線を強調したり、縦線を強調したりできて成功率が大幅にアップができます。

deeplearning.ipynb

!pip install keras==2.2.4
import keras

from google import colab
colab.drive.mount('/content/gdrive')
!ls 'gdrive/My Drive'

import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils

# 処理①で保存したデータを読み込み
x_train,x_test,y_train,y_test = np.load('gdrive/My Drive/data/交通安全くん２.npy', mmap_mode=None, allow_pickle=True , fix_imports=True, encoding='ASCII')

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
num_classes = 10
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

# ニューラルネットワーク
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import Adam
import time

model = Sequential()
model.add(Conv2D(50, (3, 3), 
          input_shape=(50, 50, 3), activation='relu'))       #畳み込み　①
model.add(Conv2D(32, (3, 3), activation='relu'))          #畳み込み　②
model.add(MaxPooling2D(pool_size=(2, 2)))                
model.add(Conv2D(64, (3, 3), activation='relu'))         ##畳み込み　③
model.add(MaxPooling2D(pool_size=(2, 2)))                
model.add(Dropout(0.25))                                 
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))                                 
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

startTime = time.time()

history = model.fit(x_train, y_train, batch_size=3000, epochs=20,
                    verbose=1, validation_data=(x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=0)
# loss
print('loss:', score[0])
# 成功率
print('accuracy:', score[1])
# 学習時間
print("Computation time:{0:.3f} sec".format(time.time() - startTime))

# 学習済のモデルを保存
model.save('gdrive/My Drive/model/交通安全くん.h5')

成功率が0.98、いい感じです。
loss: 0.11440953898268777
accuracy: 0.9878378378378379
Computation time:46.734 sec

３、Videoを分析して赤信号、止まれ、進入禁止のあるFrameを検出

①どうやって実の写真から標識の映像を切り出して、学習済のモデルで識別させますか？

以下を参考しました。
【Python/OpenCV】カラートラッキング（色追跡）で移動物体の検出

一番大きいブロブ取得.ipynb

# ブロブ解析
def analysis_blob(binary_img):
    # 2値画像のラベリング処理
    label = cv2.connectedComponentsWithStats(binary_img)

    # ブロブ情報を項目別に抽出
    n = label[0] - 1
    data = np.delete(label[2], 0, 0)
    center = np.delete(label[3], 0, 0)
    if len(center) <= 0:
        return
    # ブロブ面積最大のインデックス
    max_index = np.argmax(data[:, 4])

    return center[max_index]

予測メソッド.ipynb（引数は画像ファイルと座標）

def predictWithModel(image_file, x_from, y_from, x_to, y_to):
    image_size = 50

    X1 = []
    # トリミング
    img_crop = image_file[y_from : y_to, x_from: x_to]
    img_crop = cv2.cvtColor(img_crop, cv2.COLOR_BGR2RGB)
    img_crop = cv2.resize(img_crop, (image_size,image_size))#画像サイズの変更
    
    X1.append(img_crop)#画像をベクトルにしたもの
    X1 = np.array(X1)

    x_test = X1.astype('float32')
    x_test /= 255

    y = model.predict(x_test)  # 予測

    wk = y[0, :]
    wk_sort = np.sort(wk)
    wk_sort = wk_sort[::-1]
    max_indx = -1
    max_pcnt = -1
    if float(wk_sort[0]) > 0.9 and np.argmax(wk) != 5 and np.argmax(wk) != 0:
        max_pcnt = float(wk_sort[0])
        max_indx = np.argmax(wk)
        #print(max_indx)
        #print(max_pcnt)
    
    return max_indx, max_pcnt

座標調整メソッド.ipynb（赤信号は長方形、止まれと進入禁止は正方形）

def adjustSize(height, width, center_x, center_y, div_size, div_size_w, kbn):
    if kbn == 1:
        #長方形
        x_from = center_x - div_size_w*3//4
        x_to = x_from + div_size_w
        y_from =  center_y - div_size//2
        y_to = y_from + div_size
    else:
        #正方形
        x_from = center_x - div_size//2
        x_to = x_from + div_size
        y_from =  center_y - div_size//2
        y_to = y_from + div_size

    if x_from < 0:
        x_from = 0
    if y_from < 0:
        y_from = 0
    if x_to >= width:
        x_to = width
    if y_to >= height:
        y_to = height
    
    return x_from, y_from, x_to, y_to

画像ファイル分析.ipynb

def predictImage3(argimg):

    # 画像を読み込む。
    height, width, channels = argimg.shape[:3]

    # 画像切り取り（交通標識は上の半分にあるので、上の半分のみ分析）
    img = argimg[0:height//2,0:width//2,::]
    # HSV 色空間に変換する。
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    # 2値化で赤の枠線を抽出する。
    binary = cv2.inRange(hsv, (145, 70, 0), (180, 255, 255))
    if len(binary) <= 0:
        return argimg,-1

    # OPENING でノイズを消す。
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    eroded = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)

    # マスク画像をブロブ解析（面積最大のブロブ情報を取得）
    center = analysis_blob(eroded)
    if center is None:
        return argimg,-1

    # 中心座標を取得
    center_x = int(center[0])
    center_y = int(center[1])

    #まず長方形
    x_from, y_from, x_to, y_to = adjustSize(height, width, center_x, center_y, 60, 100, 1)
    max_indx, max_pcnt = predictWithModel(img, x_from, y_from, x_to, y_to)

    #再検索正方形
    x_from2, y_from2, x_to2, y_to2 = adjustSize(height, width, center_x, center_y, 50, 100, 0)    max_indx2, max_pcnt2 = predictWithModel(img, x_from2, y_from2, x_to2, y_to2)

    #長方形2（サイズを調整）
    x_from3, y_from3, x_to3, y_to3 = adjustSize(height, width, center_x, center_y, 40, 80, 1)
    max_indx3, max_pcnt3 = predictWithModel(img, x_from3, y_from3, x_to3, y_to3)

    pcnt = [max_pcnt, max_pcnt2, max_pcnt3]
    indx = [max_indx, max_indx2, max_indx3]
    max = np.argmax(pcnt)

    max_index = -1

    if indx[max] == 2:
        text="止まれ"
    elif indx[max] == 4:
        text="赤信号"
    elif indx[max] == 3:
        text="進入禁止"
    else:
        text=""

    if indx[max] > 0:
        # フレームに面積最大ブロブの中心周囲を円で描く
        cv2.circle(argimg, (center_x, center_y), 80, (0, 200, 0),thickness=3, lineType=cv2.LINE_AA)
        fontpath ='gdrive/My Drive/font/MSMINCHO.TTC' # フォント
        font = ImageFont.truetype(fontpath, 128) # フォントサイズ
        img_pil = Image.fromarray(argimg) # 配列の各値を8bit(1byte)整数型(0～255)をPIL Imageに変換。

        draw = ImageDraw.Draw(img_pil) # drawインスタンスを生成

        position = (center_x, center_y + 100) # テキスト表示位置
        draw.text(position, text, font = font , fill = (0,0,255,0) ) # drawにテキストを記載 fill:色 BGRA (RGB)
        max_index = indx[max]
        return np.array(img_pil),max_index # PIL を配列に変換
    
    return argimg,max_index

上記メソッドを用意完了後、事前用意したVideoを分析します

Videoを分析.ipynb

import cv2
from google.colab.patches import cv2_imshow
from PIL import ImageFont, ImageDraw, Image

target_dir = 'gdrive/My Drive/target/交通安全くん２/'
files = glob.glob(target_dir+'/src_*.mp4')

target_avi_file = target_dir + "output.avi"
output_file = target_dir + "output.mp4"
# VideoWriter を作成する。
fourcc = cv2.VideoWriter_fourcc(*"DIVX")
writer = cv2.VideoWriter(target_avi_file, fourcc, 30, (1920, 1080))
    
frame_cnt=0

fame_index_result=np.empty((0,2), int)

for i,f in enumerate(files):
    # VideoCapture を作成する。
    cap = cv2.VideoCapture(f)
    temp=np.empty((0,2), int)

    while True:
        # 1フレームずつ取得する。
        ret, frame = cap.read()
        if not ret:
            break  # フレームの取得に失敗または動画の末尾に到達した場合
        frame_cnt+=1

        frame,index = predictImage3(frame)
 
        if index > 0:
            temp = np.append(temp, np.array([[frame_cnt,index]]), axis=0)
        writer.write(frame)  # フレームを書き込む。

    #交通標識最初認識出来たFrameを記録する（＝音声を入れるタイミング）
    index_cnt = [np.count_nonzero(temp[:,1] == 2),np.count_nonzero(temp[:,1] == 3),np.count_nonzero(temp[:,1] == 4)]
    max_index = [2,3,4][np.argmax(index_cnt)]
    fame_index_result = np.append(fame_index_result, np.array([(temp[temp[:,1] == max_index])[0]]), axis=0)

writer.release()
cap.release()

４、検出したFrameに音声を入れる

音声追加.ipynb

!pip -q install pydub
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
!pip install pyaudio
import cv2
import pyaudio
import sys
import time
import wave
import pydub
from pydub import AudioSegment
import moviepy.editor as mp
import datetime

temp_file = target_dir + "temp.mp4"

# Add audio to output video.
clip_output = mp.VideoFileClip(target_avi_file).subclip()
clip_output.write_videofile(temp_file, audio=mp3_file)

cap = cv2.VideoCapture(target_avi_file)
video_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT) # フレーム数を取得する
video_fps = cap.get(cv2.CAP_PROP_FPS)           # FPS を取得する
video_len_sec = video_frame / video_fps         # 長さ（秒）を計算する
print(video_len_sec)
video = mp.VideoFileClip(temp_file).subclip(0,video_len_sec)
video.write_videofile(output_file)

mp3_stop_file_2="gdrive/My Drive/mp3/stop_2.wav"
mp3_stop_file_3="gdrive/My Drive/mp3/stop_3.wav"
mp3_stop_file_4="gdrive/My Drive/mp3/stop_4.wav"
videos = np.empty((0), mp.VideoFileClip)

startSec=0
temp_file2 = target_dir + "temp2.mp4"

# 交通標識のFrameごと、音声を入れる
for idx in range(fame_index_result.shape[0]):

    if  fame_index_result[idx][1] == 2:
        mp3_stop_file = mp3_stop_file_2
    elif  fame_index_result[idx][1] == 3:
        mp3_stop_file = mp3_stop_file_3
    elif  fame_index_result[idx][1] == 4:
        mp3_stop_file = mp3_stop_file_4
    
    base_sound = AudioSegment.from_file(mp3_stop_file)
    length_seconds = base_sound.duration_seconds  # 音声の長さを確認

    # まず0～交通標識のFrameまでを切り出し
    video_len_sec_temp = fame_index_result[idx][0] / video_fps
    videos = np.append(videos, np.array([mp.VideoFileClip(temp_file).subclip(startSec,video_len_sec_temp)]), axis=0)

    # 音声の長さを合わせて、同じ長さのVideoを切り出して、音声をいれる
    clip_output = mp.VideoFileClip(temp_file).subclip(video_len_sec_temp, video_len_sec_temp+length_seconds)
    clip_output.write_videofile(temp_file2, audio=mp3_stop_file)
    
    # 残りのVideo
    videos = np.append(videos, np.array([mp.VideoFileClip(temp_file2).subclip()]), axis=0)
    if idx == fame_index_result.shape[0] - 1:
        last_sec =  video_len_sec
    else:
        last_sec = fame_index_result[idx+1][0] / video_fps
    if video_len_sec_temp+length_seconds < last_sec:
        videos = np.append(videos, np.array([mp.VideoFileClip(temp_file).subclip(video_len_sec_temp+length_seconds, last_sec)]), axis=0)
    startSec = last_sec

# 編集したVideoを連結
final_clip = mp.concatenate_videoclips(videos)
final_clip.write_videofile(output_file)

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up