はじめに
動作確認は下記環境で行っています
・OS: Windows11 Pro (64bit)
・Anaconda Navigator: 2.6.3
・Python: 3.10.16
・UI: jupyter Notebook 7.3.2
・OpenCV: 4.11.0
1. カメラの読み込みと表示
import cv2
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("カメラを開けませんでした")
exit()
while True:
# フレームを読み込む
ret, frame = cap.read()
# 読み込み失敗時
if not ret:
print("フレームを取得できませんでした")
break
# フレームを表示
cv2.imshow("Camera", frame)
# 'q' キーで終了
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# リソース解放
cap.release()
cv2.destroyAllWindows()
2. カスケード分類機を用いた人物認識
import cv2
window_name = "RESULT_IMG"
width, height = 1280, 720
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
if not cap.isOpened():
print("Error opening the camera")
exit()
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
# カスケードファイルのパス
cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_alt2.xml"
cascade = cv2.CascadeClassifier(cascade_path)
if cascade.empty():
print("Cascade file read error")
exit()
while True:
ret, frame = cap.read()
if not ret:
break
gray_img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# detectMultiScale(img,scalefactor(縮小量,大きくなるほど計算は早くなるが,検出見逃しが発生する可能性あり),minNeighbors(小さいときは誤検出が,大きいときは見逃しが多くなる),
# flags(新しいカスケードでは利用されない),minSize(物体がとりうる最小サイズ,これよりも小さい場合は無視))
faces = cascade.detectMultiScale(
gray_img, scaleFactor=1.11, minNeighbors=3, flags=0,
minSize=(100, 100), maxSize=(300, 300)
)
for (x, y, w, h) in faces:
# cv2.rectangle(img,左上の頂点,右下の頂点,色,線の太さ,線を描画するアルゴリズムの種類)
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 255), 2, cv2.LINE_AA)
cv2.imshow(window_name, frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
3. 人物認識(応用編その1)
・録画機能の実装
・画像中に文字を追加
import cv2
import time
import numpy as np
from datetime import datetime
import os
class VariousConf:
def getTimeNumStr(self):
# time.time():UNIX時間(秒)
return str(int(time.time()))
def getDateTime(self):
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
class VariousDisplay:
def setTextMsg(self, img, text):
overlay = img.copy()
cv2.rectangle(overlay, (5, 5), (210, 35), (192, 192, 192), -1)
alpha = 0.5
# addWeighted:画像の合成
# cv2.addWeighted(画像1,画像データ1の重み,画像2,画像データ2の重み,足し合わされた値に加算されるスカラー値(明るさの調整など),合成された出力画像データ)
cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img)
cv2.putText(img, text, (10, 25), cv2.FONT_HERSHEY_PLAIN, 1, (32, 32, 32), 1, cv2.LINE_AA)
def setRecSignal(self, img):
overlay = img.copy()
cv2.rectangle(overlay, (5, 5), (175, 35), (0, 0, 0), -1)
cv2.rectangle(overlay, (8, 8), (172, 32), (192, 192, 192), -1)
alpha = 0.5
cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img)
cv2.circle(img, (20, 19), 5, (64, 64, 255), -1)
cv2.putText(img, "REC", (40, 24), cv2.FONT_HERSHEY_PLAIN, 1, (32, 32, 32), 1, cv2.LINE_AA)
def main():
window_name = "RESULT_IMG"
width, height = 1280, 720
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
if not cap.isOpened():
print("Error opening the camera")
return
# クラスの読み込み
vConf = VariousConf()
vDis = VariousDisplay()
# グレー画像(64,64,64)の生成
blankImg = np.full((height, width, 3), 64, dtype=np.uint8)
# カスケード読み込み
cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_alt2.xml"
cascade = cv2.CascadeClassifier(cascade_path)
if not cascade.load(cascade_path):
print("Cascade File Read Error")
return
# 動画を保存するフォルダの場所の指定
filename = f"/home/../{vConf.getTimeNumStr()}.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = 5
# 録画準備
writer = cv2.VideoWriter(filename, fourcc, fps, (width, height), True)
timeNum = int(vConf.getTimeNumStr())
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
while True:
ret, frame = cap.read()
if not ret:
break
dstImg = frame.copy()
grayImg = cv2.cvtColor(dstImg, cv2.COLOR_BGR2GRAY)
faces = cascade.detectMultiScale(grayImg, scaleFactor=1.11, minNeighbors=3,
minSize=(100, 100), maxSize=(300, 300))
for (x, y, w, h) in faces:
cv2.rectangle(dstImg, (x, y), (x + w, y + h), (0, 255, 255), 1, cv2.LINE_AA)
progTime = int(vConf.getTimeNumStr())
if timeNum + 5 > progTime:
vDis.setRecSignal(dstImg)
vDis.setTextMsg(frame, vConf.getDateTime())
# 録画
writer.write(frame)
print("Write", timeNum + 5, vConf.getTimeNumStr())
elif timeNum + 7 > progTime:
#
writer.write(blankImg)
cv2.imshow(window_name, dstImg)
cv2.imshow(window_name, frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
writer.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
4. 人物認識(応用編その2)
・画像処理を追加する
import cv2
import numpy as np
# グローバル変数
srcImg = None
dstImg = None
rstImg = None
thresh1 = 1
thresh2 = 11
processing_enabled = True
windowResult = "ResultView"
# カスケード分類器の読み込み
cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_alt2.xml"
cascade = cv2.CascadeClassifier(cascade_path)
def show(src, dst):
global thresh1, thresh2
gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
dst[:] = src.copy()
zeroMask = np.zeros(gray.shape, dtype=np.uint8)
sfThresh = (thresh2 + 100) / 100.0
faces = cascade.detectMultiScale(gray, scaleFactor=sfThresh, minNeighbors=thresh1 + 1)
for (x, y, w, h) in faces:
cv2.rectangle(zeroMask, (x, y), (x + w, y + h), 255, -1, cv2.LINE_AA)
filterImg = cv2.medianBlur(src, 31)
dst[:] = np.where(zeroMask[:, :, np.newaxis] == 255, filterImg, dst)
for (x, y, w, h) in faces:
cv2.rectangle(dst, (x, y), (x + w, y + h), (0, 255, 255), 2, cv2.LINE_AA)
person_label = " persons" if len(faces) > 1 else " person"
label = f"{len(faces)}{person_label} ScaleFactor:{sfThresh:.2f} MinNeighbors:{thresh1 + 1}"
showFacesCount(dst, label)
def showFacesCount(img, text):
overlay = img.copy()
alpha = 0.5
cv2.rectangle(overlay, (5, 5), (400, 30), (192, 192, 192), -1)
cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img)
cv2.putText(img, text, (30, 25), cv2.FONT_HERSHEY_PLAIN, 1, (32, 32, 32), 1, cv2.LINE_AA)
def update_thresh(param, value):
global thresh1, thresh2
if param == 'min':
thresh1 = value
elif param == 'scale':
thresh2 = value
def main():
global srcImg, dstImg, rstImg, processing_enabled
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
if not cap.isOpened():
print("Error opening the camera")
return
cv2.namedWindow(windowResult)
cv2.createTrackbar("MinNeighbors:", windowResult, thresh1, 19, lambda v: update_thresh('min', v))
cv2.createTrackbar("ScaleFactor:", windowResult, thresh2, 100, lambda v: update_thresh('scale', v))
while True:
ret, frame = cap.read()
if not ret:
break
srcImg = frame.copy()
dstImg = frame.copy()
rstImg = frame.copy()
# 処理フラグに応じて処理
if processing_enabled:
show(dstImg, rstImg)
else:
rstImg = srcImg.copy() # 処理なし
# ウィンドウに表示
cv2.imshow(windowResult, rstImg) # 処理後映像 or 処理なし
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('f'):
processing_enabled = not processing_enabled
print(f"Processing {'enabled' if processing_enabled else 'disabled'}")
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
5. カメラを使った画像センシング例
5.1 全長計測
import cv2
import numpy as np
import math
# グローバル変数
actual_diameter_cm = 2 # 初期値
mode = "select_rect" # "select_rect" or "measure"
def on_trackbar(val):
global actual_diameter_cm
actual_diameter_cm = val if val > 0 else 1 # 0を防止
class Measure:
def __init__(self):
pass
def get_base_size(self, img: np.ndarray, rect: tuple) -> float:
x, y, w, h = rect
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_roi = gray[y:y + h, x:x + w]
gray_roi = cv2.GaussianBlur(gray_roi, (11, 11), 2)
_, gray_roi = cv2.threshold(gray_roi, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
circles = cv2.HoughCircles(
gray_roi,
cv2.HOUGH_GRADIENT,
dp=2,
minDist=10,
param1=100,
param2=50,
minRadius=min(w, h) // 4,
maxRadius=min(w, h)
)
radius = 0.0
if circles is not None:
circles = np.uint16(np.around(circles))
for i in circles[0, :]:
center = (i[0] + x, i[1] + y)
radius = i[2]
cv2.circle(img, center, radius, (0, 255, 255), -1, cv2.LINE_AA)
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
return radius * 2
def get_length(self, distance: float, base_length: float) -> int:
if base_length == 0:
return 0
return int((distance / base_length) * actual_diameter_cm)
mouse_event = {
"x1": 0, "y1": 0, "x2": 0, "y2": 0,
"drawing": False,
"rect_set": False,
"dragging_line": False
}
def mouse_callback(event, x, y, flags, param):
global mouse_event, start_point, mode
if event == cv2.EVENT_LBUTTONDOWN:
if mode == "select_rect":
mouse_event["x1"], mouse_event["y1"] = x, y
mouse_event["drawing"] = True
elif mode == "measure":
mouse_event["x1"], mouse_event["y1"] = x, y
start_point = (x, y)
mouse_event["dragging_line"] = True
elif event == cv2.EVENT_MOUSEMOVE:
if mouse_event["drawing"] or mouse_event["dragging_line"]:
mouse_event["x2"], mouse_event["y2"] = x, y
elif event == cv2.EVENT_LBUTTONUP:
mouse_event["x2"], mouse_event["y2"] = x, y
if mouse_event["drawing"]:
mouse_event["drawing"] = False
mouse_event["rect_set"] = True
elif mouse_event["dragging_line"]:
mouse_event["dragging_line"] = False
def main():
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
if not cap.isOpened():
print("Error opening the camera")
return
window_name = "RESULT_IMG"
cv2.namedWindow(window_name)
cv2.setMouseCallback(window_name, mouse_callback)
cv2.createTrackbar("diameter(cm)", window_name, actual_diameter_cm, 10, on_trackbar)
measure = Measure()
global start_point
start_point = None
end_point = None
history_base_lengths = []
global mode
while True:
ret, frame = cap.read()
if not ret:
break
img_draw = frame.copy()
# モード表示
cv2.putText(img_draw, f"Mode: {mode}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
if mode == "select_rect" and mouse_event["rect_set"]:
x1, y1 = mouse_event["x1"], mouse_event["y1"]
x2, y2 = mouse_event["x2"], mouse_event["y2"]
x, y = min(x1, x2), min(y1, y2)
w, h = abs(x1 - x2), abs(y1 - y2)
if w > 10 and h > 10:
rect = (x, y, w, h)
base_length = measure.get_base_size(img_draw, rect)
if base_length > 0:
history_base_lengths.append(base_length)
if mode == "measure" and mouse_event["dragging_line"]:
end_point = (mouse_event["x2"], mouse_event["y2"])
center_point = ((start_point[0] + end_point[0]) // 2, (start_point[1] + end_point[1]) // 2)
distance_px = int(math.hypot(start_point[0] - end_point[0], start_point[1] - end_point[1]))
display_distance = distance_px
unit = "px"
if history_base_lengths:
avg_base_length = sum(history_base_lengths) / len(history_base_lengths)
display_distance = measure.get_length(distance_px, avg_base_length)
unit = "cm"
label = f"{display_distance} {unit}"
cv2.line(img_draw, start_point, end_point, (0, 255, 255), 1, cv2.LINE_AA)
cv2.putText(img_draw, label, center_point, cv2.FONT_HERSHEY_PLAIN, 1.2, (0, 255, 255), 2, cv2.LINE_AA)
if mode == "select_rect" and mouse_event["drawing"]:
cv2.rectangle(img_draw, (mouse_event["x1"], mouse_event["y1"]), (mouse_event["x2"], mouse_event["y2"]), (255, 0, 0), 1)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('c'):
start_point = None
end_point = None
history_base_lengths.clear()
mouse_event["rect_set"] = False
elif key == ord('m'):
mode = "measure" if mode == "select_rect" else "select_rect"
print(f"Mode changed to: {mode}")
cv2.imshow(window_name, img_draw)
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
5.2 往復運動の計測
import cv2
import numpy as np
class UpDownCounter:
def __init__(self, height):
self.height = height
self.init()
def init(self):
self.minCircle = 5
self.maxCircle = 30
self.minPoint = self.height
self.maxPoint = 0
self.tempPoint = 0
self.upCount = 0
def get_circle(self, img, mask):
mask_blur = cv2.blur(mask, (11, 11))
circles = cv2.HoughCircles(mask_blur, cv2.HOUGH_GRADIENT, dp=2,
minDist=img.shape[0]//4,
param1=70, param2=20, minRadius=10, maxRadius=40)
if circles is not None:
circles = np.round(circles[0, :]).astype("int")
for (x, y, r) in circles:
cv2.circle(img, (x, y), r, (0, 0, 255), 2, cv2.LINE_AA)
if len(circles) == 1 and y > 0:
self.minPoint = min(self.minPoint, y)
self.maxPoint = max(self.maxPoint, y)
self.tempPoint = y
def set_msg_area(self, img):
overlay = img.copy()
cv2.rectangle(overlay, (5, 5), (410, 55), (64, 64, 64), -1)
cv2.addWeighted(overlay, 0.5, img, 0.5, 0, img)
def show_info(self, img):
cv2.putText(img, f"Count = {self.upCount:3d}", (15, 23), cv2.FONT_HERSHEY_SIMPLEX,
0.4, (255, 255, 255), 1, cv2.LINE_AA)
text = f"Position = {self.tempPoint:3d} Higher Position = {self.minPoint:3d} Lower Position = {self.maxPoint:3d}"
cv2.putText(img, text, (15, 40), cv2.FONT_HERSHEY_SIMPLEX,
0.4, (255, 255, 255), 1, cv2.LINE_AA)
def add_up_count(self):
self.upCount += 1
def set_min_circle(self, min_circle):
self.minCircle = min_circle
def get_min_point(self):
return self.minPoint
def get_max_point(self):
return self.maxPoint
def get_temp_point(self):
return self.tempPoint
hMax, sMax, vMax = 0, 0, 0
hMin, sMin, vMin = 255, 255, 255
hsvImg = None
def mouse_callback(event, x, y, flags, userdata):
global hMax, sMax, vMax, hMin, sMin, vMin, hsvImg
if hsvImg is None:
return
if event == cv2.EVENT_MOUSEMOVE and flags == cv2.EVENT_FLAG_LBUTTON:
H, S, V = hsvImg[y, x]
hMax = max(hMax, H)
sMax = max(sMax, S)
vMax = max(vMax, V)
hMin = min(hMin, H)
sMin = min(sMin, S)
vMin = min(vMin, V)
elif event == cv2.EVENT_RBUTTONDOWN:
hMax, sMax, vMax = 0, 0, 0
hMin, sMin, vMin = 255, 255, 255
userdata.init()
def main():
global hsvImg
width, height = 1280, 720
foreImg = None
maskImg = None
backImg = np.full((height, width, 3), (0, 255, 255), dtype=np.uint8)
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
if not cap.isOpened():
print("Error opening the camera")
return
udc = UpDownCounter(height)
countFlg = False
windowName = "COMPOSITE"
cv2.namedWindow(windowName, cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
cv2.setMouseCallback(windowName, mouse_callback, udc)
while True:
lower = np.array([hMin, sMin, vMin])
upper = np.array([hMax, sMax, vMax])
ret, foreImg = cap.read()
if not ret:
break
hsvImg = cv2.cvtColor(foreImg, cv2.COLOR_BGR2HSV)
maskImg = cv2.inRange(hsvImg, lower, upper)
if maskImg.shape[0] == height:
foreImg = cv2.bitwise_or(foreImg, backImg, mask=maskImg)
udc.set_msg_area(foreImg)
udc.get_circle(foreImg, maskImg)
minToMaxDis = udc.get_max_point() - udc.get_min_point()
if minToMaxDis > foreImg.shape[0] // 10:
borderPoint = udc.get_max_point() - minToMaxDis // 2
if borderPoint < udc.get_temp_point() and not countFlg:
udc.add_up_count()
countFlg = True
elif borderPoint > udc.get_temp_point():
countFlg = False
udc.show_info(foreImg)
cv2.imshow(windowName, foreImg)
if cv2.waitKey(1) >= 0:
break
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
5.3 侵入者の検知
・背景差分の検出(createBackgroundSubtractorMOG2)を行う
・変化の割合が大きければ録画開始、5秒間録画したあと2秒間真っ暗な映像を付け足す
・スライダー(createTrackbar)で感度変更可
import cv2
import numpy as np
from datetime import datetime
import time
import os
class VariousConf:
def __init__(self):
pass
def getTimeNumStr(self):
return str(int(time.time()))
def getDateTime(self):
now = datetime.now()
return now.strftime("%Y-%m-%d %H:%M:%S")
class Backgroundsub:
def __init__(self):
self.bg_sub = cv2.createBackgroundSubtractorMOG2()
def createDiff(self, img, mask_img):
bg_img = np.full_like(img, (255, 0, 250))
mask = self.bg_sub.apply(img, learningRate=-1) #背景と異なる部分は白, 変化のない部分は黒の画像をかえす
mask = cv2.medianBlur(mask, 11)
_, mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
output_img = cv2.bitwise_and(bg_img, img, mask=mask)
img[:] = np.where(mask[:, :, np.newaxis] == 255, output_img, img) #maskが白のピクセルはoutput_img, それ以外は元のimg
mask_img[:] = mask
def doWatchChange(self, mask_img, threshold):
white = np.count_nonzero(mask_img)
black = mask_img.size - white
if black == 0:
return False
ratio = (white / black) * 100
return 0 < ratio < 100 and ratio > threshold
def setTextMsg(self, img, text):
overlay = img.copy()
alpha = 0.5
cv2.rectangle(overlay, (5, 5), (205, 35), (192, 192, 192), -1)
cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img)
cv2.putText(img, text, (10, 25), cv2.FONT_HERSHEY_PLAIN, 1, (32, 32, 32), 1, cv2.LINE_AA)
def main():
window_name = "RESULT_IMG"
width, height = 1280, 720
blank_img = np.full((height, width, 3), 64, dtype=np.uint8)
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
if not cap.isOpened():
print("Error opening the camera")
return
vconf = VariousConf()
os.makedirs("../../VIDEOS", exist_ok=True)
file_name = f"../../VIDEOS/{vconf.getTimeNumStr()}.avi"
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
writer = cv2.VideoWriter(file_name, fourcc, 5, (width, height))
bgsub = Backgroundsub()
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
cv2.moveWindow(window_name, 0, -28)
th = 10 # 初期しきい値
time_num = int(vconf.getTimeNumStr())
def on_trackbar(val):
nonlocal th
th = val
cv2.createTrackbar("Thresholds:", window_name, th, 100, on_trackbar)
while True:
ret, src_img = cap.read()
if not ret:
break
dst_img = src_img.copy()
mask_img = np.zeros((height, width), dtype=np.uint8)
bgsub.createDiff(dst_img, mask_img)
bgsub.setTextMsg(src_img, vconf.getDateTime())
if bgsub.doWatchChange(mask_img, th):
time_num = int(vconf.getTimeNumStr())
writer.write(src_img)
prog_time = int(vconf.getTimeNumStr())
if time_num + 5 > prog_time:
writer.write(src_img)
print("Write", time_num + 5, vconf.getTimeNumStr())
elif time_num + 7 > prog_time:
writer.write(blank_img)
cv2.imshow(window_name, dst_img)
if cv2.waitKey(1) >= 0:
break
writer.release()
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
6. 演習課題
演習①
上記プログラムをそれぞれ自分のPCで動作させて下さい.
少なくとも「4」までは確実に行ってください
演習② 2.カスケード分類機を用いた人物認識において
パラメータ
・scalefactor
・minNeighbors
・minSize
・カメラ解像度(例:640x480などへ)
を変更して挙動が変わるか実験をして確かめて下さい
演習③ 3.人物認識(応用編その1)において
動画保存場所をきちんと指定して保存された動画を確認して下さい
課題④ 5.3の画像で差分が紫色で表示されますがこの色を変えてみて下さい
課題①検出を安定させよう(1個検出)
まずは検出するものは1個だけという条件下で、フレームごとに検出したりしなかったりチカチカする状態を
改善(安定)させて下さい.
【ヒント】
・一定回数以上連続検出したらはじめて「検出」判定する
・検出できないフレームが続いたら「対象が消えた」と判定する
もとにするプログラムはどれでもよいです(5.1~5.3以外)
課題②検出を安定させよう(複数検出)
課題①について、複数物体の検出でも対応可能に改善しよう