Webカメラのキャプチャ
OpenCVによるキャプチャはよく見かけるけど、PyAVでもキャプチャできる。
今回は、PyAVでキャプチャしたら、解像度設定もちゃんとでき、
同じ解像度だとFPSがOpenCVよりよかったという話。
あと、imshowは遅いので、1920x1080 を3画面出すなら、OpenGL使ったほうが早い。
1画面なら30fpsは出るので、必要なfpsで使い分けしてもいいかも。
(OpenGLはなんだかんだ面倒だし)
OpenCVで解像度指定してもうまくいかない
今回、たまたまあったFullHD対応Webカメラ(Logi C615 HD WebCam)で、
OpenCVでキャプチャしたら、なぜか960x720までしか解像度が上がらず、
ネットを調べても、そんなことで困っているひとがおらず、
他の方法を調べてみた。ちなみにその時のコード。
import os
import cv2
from datetime import datetime
from collections import deque
import time
class Dict(dict):
__getattr__ = dict.__getitem__
# ダンプするVideoCaptureの値一覧
param_id2name = dict( (eval(f"cv2.CAP_PROP_{n}"), n) for n in """
FRAME_WIDTH FRAME_HEIGHT FPS
FOURCC FORMAT MODE CONVERT_RGB
BRIGHTNESS CONTRAST SATURATION
HUE GAIN EXPOSURE WB_TEMPERATURE
GAMMA FOCUS PAN TILT ROLL IRIS
AUTO_EXPOSURE AUTOFOCUS AUTO_WB
TEMPERATURE ZOOM
TRIGGER TRIGGER_DELAY SETTINGS
""".split() )
# キャプチャ画像保存場所
dirname = 'cap_c615a'
# 解像度
# cap_height = 960
# cap_width = 720
# cap_height = 3840
# cap_width = 2160
cap_height = 1920
cap_width = 1080
# VideoCaptureに設定する値
cap_sets = {
cv2.CAP_PROP_FOURCC: cv2.VideoWriter_fourcc(*"MJPG"),
cv2.CAP_PROP_FPS: 30,
cv2.CAP_PROP_FRAME_HEIGHT: cap_height,
cv2.CAP_PROP_FRAME_WIDTH: cap_width,
# 繰り返すと、設定できるという話があったので、繰り返して設定してみたが・・・
cv2.CAP_PROP_FRAME_HEIGHT: cap_height,
cv2.CAP_PROP_FRAME_WIDTH: cap_width,
cv2.CAP_PROP_FRAME_HEIGHT: cap_height,
cv2.CAP_PROP_FRAME_WIDTH: cap_width,
# cv2.CAP_PROP_SETTINGS: 1,
}
# USBカメラ。今回は3台同時キャプチャする。
cap_def = Dict(
cap1=Dict(args=[3, cv2.CAP_DSHOW], sets=cap_sets, ),
cap2=Dict(args=[1, cv2.CAP_DSHOW], sets=cap_sets, ),
cap3=Dict(args=[2, cv2.CAP_DSHOW], sets=cap_sets, ),
)
# 直近N_SAMPLE分の時刻をキューで保持。FPSの計算用
N_SAMPLE = 10
q = deque([time.time() for i in range(N_SAMPLE)])
# デバイスオープン
caps = {}
param_list = [ [k] for k in param_id2name.keys() ]
for n, p in cap_def.items():
print(n)
caps[n] = cv2.VideoCapture(*p.args)
for k, v in p.sets.items():
# print(f"setting: {param_id2name[k]}={v}")
if not caps[n].set(k, v):
print(f"setting failed: {param_id2name[k]}={v}")
# 設定値ダンプ用
# print(caps[n].get(cv2.CAP_PROP_FRAME_WIDTH), caps[n].get(cv2.CAP_PROP_FRAME_HEIGHT))
for p in param_list:
p.append(caps[n].get(p[0]))
# 設定値ダンプ
for p in param_list: print(param_id2name[p[0]], p[1:])
frame = {}
while (True):
for n, c in caps.items():
_, frame[n] = c.read()
# 直近N_SAMPLEフレームからfps算出
now = time.time()
fps = N_SAMPLE / (now - q.popleft())
q.append(now)
for n, f in frame.items():
if f is not None:
# FPSの値を描画
cv2.putText(f,'{:6.3f}fps'.format(fps), (10,20),
cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 0))
cv2.imshow(n, f)
key = cv2.waitKey(10) & 0xFF
if key == ord('c'):
dt = datetime.now().strftime ( '%Y%m%d_%H%M%S_%f' )
print("capture", dt)
os.makedirs(dirname, exist_ok=True)
for n, f in frame.items():
if f is not None:
cv2.imwrite(f'{dirname}/cap_{dt}_{n}.png', f)
if key == 27: # ESC
break
for c in caps.values():
c.release()
cv2.destroyAllWindows()
PyAVによるキャプチャ
PyAVはffmpeg
のラッパーなので、これでキャプチャできるか確認。
参考: DirectShow - FFmpeg
デバイスの一覧のやり方
!ffmpeg -list_devices true -f dshow -i dummy
・・・中略・・・
[dshow @ 000001a49f18e580] "Logi C615 HD WebCam"
[dshow @ 000001a49f18e580] Alternative name "@device_pnp_\\?\usb#vid_046d&pid_082c&mi_02#6&15d3e9a7&0&0002#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global"
[dshow @ 000001a49f18e580] "Logi C615 HD WebCam"
[dshow @ 000001a49f18e580] Alternative name "@device_pnp_\\?\usb#vid_046d&pid_082c&mi_02#6&c7e554f&0&0002#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global"
[dshow @ 000001a49f18e580] "Logi C615 HD WebCam"
[dshow @ 000001a49f18e580] Alternative name "@device_pnp_\\?\usb#vid_046d&pid_082c&mi_02#9&8216d50&0&0002#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global"
設定可能なデバイス用の設定項目
!ffmpeg -list_options true -f dshow -i video="Logi C615 HD WebCam"
・・・中略・・・
[dshow @ 000001f8a70ee480] DirectShow video device options (from video devices)
[dshow @ 000001f8a70ee480] Pin "キャプチャ" (alternative pin name "0")
[dshow @ 000001f8a70ee480] pixel_format=yuyv422 min s=640x480 fps=5 max s=640x480 fps=30
・・・中略・・・
[dshow @ 000001f8a70ee480] pixel_format=yuyv422 min s=1600x896 fps=5 max s=1600x896 fps=7.5
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=864x480 fps=5 max s=864x480 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=864x480 fps=5 max s=864x480 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=1024x576 fps=5 max s=1024x576 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=1024x576 fps=5 max s=1024x576 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=800x600 fps=5 max s=800x600 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=800x600 fps=5 max s=800x600 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=960x720 fps=5 max s=960x720 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=960x720 fps=5 max s=960x720 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=1280x720 fps=5 max s=1280x720 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=1280x720 fps=5 max s=1280x720 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=1600x896 fps=5 max s=1600x896 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=1600x896 fps=5 max s=1600x896 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=1920x1080 fps=5 max s=1920x1080 fps=30
[dshow @ 000001f8a70ee480] vcodec=mjpeg min s=1920x1080 fps=5 max s=1920x1080 fps=30
video=Logi C615 HD WebCam: Immediate exit requested
ffmpeg でキャプチャしてみる
ffmpeg -f dshow -video_device_number 1 -video_size 1600x896 -vcodec mjpeg -i video="Logi C615 HD WebCam" test.avi
[mjpeg @ 000002015b1e9540] unable to decode APP fields: Invalid data found when processing input
Input #0, dshow, from 'video=Logi C615 HD WebCam':
Duration: N/A, start: 213165.622000, bitrate: N/A
Stream #0:0: Video: mjpeg (Baseline) (MJPG / 0x47504A4D), yuvj422p(pc, bt470bg/unknown/unknown), 1600x896, 30 fps, 30 tbr, 10000k tbn, 10000k tbc
Stream mapping:
Stream #0:0 -> #0:0 (mjpeg (native) -> mpeg4 (native))
Press [q] to stop, [?] for help
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
[swscaler @ 000002015b254a00] deprecated pixel format used, make sure you did set range correctly
Output #0, avi, to 'test.avi':
Metadata:
ISFT : Lavf58.45.100
Stream #0:0: Video: mpeg4 (FMP4 / 0x34504D46), yuv420p, 1600x896, q=2-31, 200 kb/s, 30 fps, 30 tbn, 30 tbc
Metadata:
encoder : Lavc58.91.100 mpeg4
Side data:
cpb: bitrate max/min/avg: 0/0/200000 buffer size: 0 vbv_delay: N/A
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
Last message repeated 15 times
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
Last message repeated 13 times
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
Last message repeated 14 times
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
Last message repeated 15 times
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
Last message repeated 14 times
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
Last message repeated 14 times
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
Last message repeated 15 times
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
Last message repeated 14 times
[mjpeg @ 000002015b1f10c0] unable to decode APP fields: Invalid data found when processing input
frame= 124 fps= 30 q=31.0 Lsize= 1573kB time=00:00:04.13 bitrate=3116.8kbits/s speed=1.01x
video:1564kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.545260%
ちなみに、C615だとめっちゃ警告出るけど、どうしようもないみたい。
Getting "unable to decode APP fields" while playing USB webcam stream through ffplay
PyAVによるキャプチャしてみる
引数は、大体FFmpegと同じ。遅延が大きかったのでrtbufsizeを最低に設定。
さすがに3台同時だと10~15fps、2台だと15~20fps、1台なら30fps
import logging
import av
import cv2
from collections import deque
import time
logging.basicConfig()
logging.getLogger('libav').setLevel(logging.FATAL)
# 直近N_SAMPLE分の時刻をキューで保持
N_SAMPLE = 10
q = deque([time.time() for i in range(N_SAMPLE)])
con_options = dict(
video_size='1920x1080',
# video_size='960x720',
# video_size='1280x720',
vcodec='mjpeg',
framerate='30',
rtbufsize='1',
# show_video_device_dialog='true',
)
con_def = [
dict(format='dshow', file=r'video=Logi C615 HD WebCam', options=dict(con_options, video_device_number='1') ),
dict(format='dshow', file=r'video=Logi C615 HD WebCam', options=dict(con_options, video_device_number='0') ),
dict(format='dshow', file=r'video=Logi C615 HD WebCam', options=dict(con_options, video_device_number='2') ),
# dict(format='dshow', file=r'video=@device_pnp_\\?\usb#vid_046d&pid_082c&mi_02#9&8216d50&0&0002#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global', options=con_options ),
# dict(format='dshow', file=r'video=@device_pnp_\\?\usb#vid_046d&pid_082c&mi_02#6&15d3e9a7&0&0002#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global', options=con_options ),
# dict(format='dshow', file=r'video=@device_pnp_\\?\usb#vid_046d&pid_082c&mi_02#6&c7e554f&0&0002#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global', options=con_options ),
]
containers = []
for args in con_def:
containers.append(av.open(**args))
try:
for frames in zip( *[c.decode(video=0) for c in containers] ):
imgs = [f.to_ndarray(format='bgr24') for f in frames]
# 直近N_SAMPLEフレームからfps算出
now = time.time()
fps = N_SAMPLE / (now - q.popleft())
q.append(now)
for i, img in enumerate(imgs):
cv2.putText(img,'{:6.3f}fps'.format(fps),(10,20),cv2.FONT_HERSHEY_PLAIN, 1,(255,255,0))
cv2.imshow(f'VIDEO {i}', img)
if 27 == cv2.waitKey(10):
break
except KeyboardInterrupt:
print("KeyboardInterrupt")
finally:
for c in containers:
c.close()
cv2.destroyAllWindows()
PyOpenGL による描画
どうも3画面同時で10~15fpsになるのは、cv2.imshow が遅いから見たい。
表示なしで更新頻度見ると30fpsぐらい出そう。
PythonでOpenCVの画像をOpenGLで表示する
を参考に、OpenGLで表示。
下記コードで試すと、3画面同時で20~25fpsぐらい、もう少し早くならないかな。
from OpenGL.GL import *
from OpenGL.GLU import *
from OpenGL.GLUT import *
import threading
import cv2
import numpy as np
class OpenGLWindow:
@classmethod
def get_manager(cls):
return cls.Manager.get_instance()
@classmethod
def mainloop(cls):
cls.get_manager().mainloop()
class Manager:
@classmethod
def get_instance(cls):
if not hasattr(cls, "_instance"):
cls._instance = cls()
return cls._instance
def __init__(self):
self.__winids = []
def append(self, winid):
self.__winids.append(winid)
def __idle(self):
for winid in self.__winids:
glutSetWindow(winid)
glutPostRedisplay()
def mainloop(self):
glutIdleFunc(self.__idle)
glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_GLUTMAINLOOP_RETURNS);
glutMainLoop()
def update_image(self, image):
self.image = image
def run(self):
if self.read_image_iterator is None:
return
for image in self.read_image_iterator:
self.update_image(image)
if self.closing:
break
self.read_image_iterator = None
def draw(self):
# Paste into texture to draw at high speed
img = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) #BGR-->RGB
h, w = img.shape[:2]
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, w, h, 0, GL_RGB, GL_UNSIGNED_BYTE, img)
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
glColor3f(1.0, 1.0, 1.0)
# Enable texture map
glEnable(GL_TEXTURE_2D)
# Set texture map method
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR)
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR)
# draw square
glBegin(GL_QUADS)
# 左下
glTexCoord2d(0.0, 1.0)
glVertex3d(-w/2, -h/2, 0.0)
# 右下
glTexCoord2d(1.0, 1.0)
glVertex3d( w/2, -h/2, 0.0)
# 右上
glTexCoord2d(1.0, 0.0)
glVertex3d( w/2, h/2, 0.0)
# 左上
glTexCoord2d(0.0, 0.0)
glVertex3d(-w/2, h/2, 0.0)
glEnd()
glFlush();
glutSwapBuffers()
def reshape(self, w, h):
self.windowWidth = w
self.windowHeight = h
glViewport(0, 0, w, h)
glLoadIdentity()
#Make the display area proportional to the size of the view
glOrtho(-w/2, w/2, -h/2, h/2, -1.0, 1.0)
def closeWindow(self):
print('closeWindow')
glutLeaveMainLoop()
def close(self):
self.closing = True
def keyboard(self, key, x, y):
# convert byte to str
key = key.decode('utf-8')
# press q to exit
if key == 'q':
self.closeWindow()
def init(self):
glClearColor(0.7, 0.7, 0.7, 0.7)
def __init__(
self, argv=(),
x=0, y=0, width=720, height=480,
title="Display", read_image_iterator=None, auto_start=True,
):
self.title = title
self.image = np.full((height, width, 3), 255, np.uint8)
self.windowWidth = width
self.windowHeight = height
self.closing = False
self.read_image_iterator = read_image_iterator
self.thread = threading.Thread(target=self.run, daemon=True)
glutInitWindowPosition(x, y);
glutInitWindowSize(width, height);
glutInit(argv)
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE )
winid = glutCreateWindow(title)
glutDisplayFunc(self.draw)
glutReshapeFunc(self.reshape)
glutKeyboardFunc(self.keyboard)
glutCloseFunc(self.close)
self.get_manager().append(winid)
self.init()
if auto_start and read_image_iterator is not None:
self.thread.start()
import logging
import av
import cv2
from collections import deque
import time
logging.basicConfig()
logging.getLogger('libav').setLevel(logging.FATAL)
# FPS算出のサンプル数
N_SAMPLE = 10
con_options = dict(
video_size='1920x1080',
# video_size='960x720',
# video_size='1280x720',
# video_size='1600x896',
vcodec='mjpeg',
framerate='30',
rtbufsize='1',
# show_video_device_dialog='true',
)
con_def = [
dict(format='dshow', file=r'video=Logi C615 HD WebCam', options=dict(con_options, video_device_number='1') ),
dict(format='dshow', file=r'video=Logi C615 HD WebCam', options=dict(con_options, video_device_number='0') ),
dict(format='dshow', file=r'video=Logi C615 HD WebCam', options=dict(con_options, video_device_number='2') ),
#dict(format='dshow', file=r'video=@device_pnp_\\?\usb#vid_046d&pid_082c&mi_02#9&8216d50&0&0002#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global', options=con_options ),
#dict(format='dshow', file=r'video=@device_pnp_\\?\usb#vid_046d&pid_082c&mi_02#6&15d3e9a7&0&0002#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global', options=con_options ),
#dict(format='dshow', file=r'video=@device_pnp_\\?\usb#vid_046d&pid_082c&mi_02#6&c7e554f&0&0002#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global', options=con_options ),
]
def read_frame(args):
container = av.open(**args)
try:
for frame in container.decode(video=0):
yield frame.to_ndarray(format='bgr24')
finally:
print("close")
container.close()
def draw_fps(reader):
# 直近N_SAMPLE分の時刻をキューで保持
q = deque([time.time() for i in range(N_SAMPLE)])
for frame in reader:
# 直近N_SAMPLEフレームからfps算出
now = time.time()
fps = N_SAMPLE / (now - q.popleft())
q.append(now)
cv2.putText(frame,'{:6.3f}fps'.format(fps),(10,20),cv2.FONT_HERSHEY_PLAIN, 1,(255,255,0))
yield frame
window_width = 1920
window_height = 1080
windows = []
for i, args in enumerate(con_def):
read_image_iterator = draw_fps(read_frame(args))
windows.append(OpenGLWindow(title=f"Cap{i}",
read_image_iterator=read_image_iterator,
x=i * window_width, width=window_width, height=window_height))
OpenGLWindow.mainloop()