More than 5 years have passed since last update.

SkyWayを使ってリアルタイム物体検出つきビデオチャットを作る

Last updated at 2020-06-15Posted at 2020-06-11

概要

ビデオチャットのSkyWayに物体検出をいれて、リアルタイムで物体検出しながら
ビデオチャットをする謎のビデオチャットです。

できたもの

#ProtoOut pic.twitter.com/bjZZPddXEY
— 3yaka (@3yaka4) June 11, 2020

概要

SkyWayで作ったビデオチャットに機械学習のTensorFlow.jsを優しーく包んでくれたml5.jsのYOLOを使って物体検出をさせ、PoseNetを使ってプライバシーを配慮した目線をかくすものをつけました。

人物に四角がついてその上にPersonと出て、左目から右目にかけて線が入ります。

1. SkyWayを使って webRTC

Javascript SDK | API Reference | SkyWay(アプリやWebサービスに、ビデオ・音声通話をかんたんに導入・実装できるSDK)
こちらを使っていきます

2. PoseNetを使って姿勢推定

Webブラウザでリアルタイムに人間の姿勢推定を可能にする機械学習モデル「PoseNet」
PoseNetとはリアルタイムに人間の姿勢推定を可能にする機械学習モデルです。
これを使ってプライバシー保護っぽく左目から右目にかけてlineを引きます。

3.YOLOを使って物体検出

YOLO（You Only Look Once）
YOLOとは、ウェブカメラやスマホカメラから取得した映像をYOLOというアルゴリズムを使用し物体検出をしてくれます。

Netlify freenomで公開して他の人とチャットする

SkyWayを他の人と、使うのにHTTPSが必須です！
今回はindex.htmlとmain.jsと画像だけの静的ファイルなのでgitもつかわずNetlifyに直接アップロードして公開します。

コード

<!DOCTYPE html>
<html lang="ja">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
        <meta http-equiv="X-UA-Compatible" content="ie=edge">
      
    <title>myVideoChat</title>
//背景とボタン、フォームの装飾
    <style>
html{ background: url(haikei.jpg) center center no-repeat; background-size: cover; height: 100vh; } body{ width:700px; margin:auto; } input[type=text] , textarea { border-radius: 4px; border: 1px solid #999; background-color: #fff; padding: 10px; } input[type=text] { width: 14m; } input[type=text].input_err { border: 1px solid #c44; background-color: #fdd; } button{ border: none; width: 8em; padding: 10px; border-radius: 5px; background-color: #00BCD4; color: #fff; font-weight: bold; appearance: none; -webkit-appearance: none; -moz-appearance: none; cursor: pointer; margin: 0 auto; } p{ color:#fff; }
    </style>
  </head>

  <body>
    <div id="hako">
　　　//YOLOが読み込まれたら表示が変わる
     <p id="status">モデルを読み込むまでまってね...</p>
    </div>

    <script src="https://cdnjs.cloudflare.com/ajax/libs/p5.js/1.0.0/p5.min.js"></script>
    <script src="https://unpkg.com/ml5@0.4.3/dist/ml5.min.js"></script>
    <script src="https://cdn.webrtc.ecl.ntt.com/skyway-latest.js"></script>
    <script src="main.js"></script>
  </body>
</html>

let capture;
let theirVideo;
let poses = [];
let theirPoses = [];
let yolo;
let status;
let objects = [];
let theirObjects = [];
let yolo_thier;

function setup() {
  //canvas作成
  createCanvas(640, 240);
  //自分用カメラ設定と姿勢検出
  capture = createCapture({
    video: {
      width: 640,
      height: 480
    },
    audio: false
  });
  // YOLOオブジェクトを作成する
  yolo = ml5.YOLO(capture, startDetecting);
  status = select('#status');
  //キャンバスで描くので非表示
  capture.hide();
  ml5.poseNet(capture).on("pose", (results) => {
    poses = results;
  });

  // skywayのインスタンスを作成
  let peer = new Peer({
    key: "***",
  });
  // skywayでドメインを許可していれば実行される
  peer.on("open", () => {
    console.log("open! id=" + peer.id);
    createP("Your id: " + peer.id);
  });

  // id入力タグの生成
  let idInput = createInput("");

  // 送信ボタンの生成
  createButton("Call").mousePressed(() => {
    // ボタンが押されたら
    const callId = idInput.value(); //id入力欄の値を取得
    console.log("call! id=" + peer.id);
    const call = peer.call(callId, capture.elt.srcObject); //id先を呼び出し
    addVideo(call);
  });

  // // 相手から呼び出された実行される
  peer.on("call", (call) => {
    console.log("be called!");
    call.answer(capture.elt.srcObject); //呼び出し相手に対して返す
    addVideo(call);
  });

  // 相手の映像を追加処理
  function addVideo(call) {
    call.on("stream", (theirStream) => {
      console.log("stream!");
      //相手のビデオを作成
      theirVideo = createVideo();
      theirVideo.elt.autoplay = true;
      theirVideo.elt.srcObject = theirStream;
      theirVideo.hide(); //キャンバスで描くので非表示

      // 相手のビデオから姿勢検出
      ml5.poseNet(theirVideo).on("pose", (results) => {
        theirPoses = results;
      });
      yolo_thier = ml5.YOLO(theirVideo, startDetecting);
    });
  }
}

function draw() {
  if (capture) image(capture, 0, 0, 320, 240);
  if (theirVideo) image(theirVideo, 320, 0, 320, 240);
  //物体検出
  for (let i = 0; i < objects.length; i++) {
    noStroke();
    fill(177, 30, 91);
    text(objects[i].label, objects[i].x * 320, objects[i].y * 240 - 5);
    noFill();
    strokeWeight(4);
    stroke(255, 255, 0);
    rect(objects[i].x * 320, objects[i].y * 240, objects[i].w * 320, objects[i].h * 240);
  }
  //相手の物体検出
  for (let i = 0; i < theirObjects.length; i++) {
    noStroke();
    fill(177, 0, 91);
    text(theirObjects[i].label, theirObjects[i].x * 320 + 320, theirObjects[i].y * 240 - 5);

    noFill();
    strokeWeight(4);
    stroke(255, 0, 0);
    rect(theirObjects[i].x * 320 + 320, theirObjects[i].y * 240, theirObjects[i].w * 320, theirObjects[i].h * 240);
  }

  //自分の目の位置を取得
  let myLeyePos = getPartsPosition(poses, "leftEye");
  let myReyePos = getPartsPosition(poses, "rightEye");
  //相手の目の位置を取得
  let thLeyePos = getPartsPosition(theirPoses, "leftEye");
  let thReyePos = getPartsPosition(theirPoses, "rightEye");

  //線で結ぶ
  //半分の大きさにしたので、座標も半分に
  strokeWeight(10); //線の太さ
  stroke(0, 255, 0); //線の色 R,G,B
  line(
    myLeyePos.x / 2,
    myLeyePos.y / 2,
    myReyePos.x / 2,
    myReyePos.y / 2
  );
  stroke(255, 255, 0); //線の色 R,G,B
  line(
    thLeyePos.x / 2 + 320,
    thLeyePos.y / 2,
    thReyePos.x / 2 + 320,
    thReyePos.y / 2
  );
}

//指定された名前の部位の座標を取得できる
function getPartsPosition(poses, partsName) {
  if (poses[0])
    for (let i = 0; i < poses[0].pose.keypoints.length; i++)
      if (poses[0].pose.keypoints[i].part == partsName)
        return poses[0].pose.keypoints[i].position;

  return {
    x: 0,
    y: 0
  };
}

function startDetecting() {
  status.html('モデルを読み込んだ');
  detect();
  theirDetect();
}
// ビデオからのイメージを物体検出する
function detect() {
  yolo.detect(function(err, results) {
    // 結果を配列objectsに割り当てる
    objects = results;
    // 連続して検出
    detect();
  });
}
function theirDetect() {
  yolo_thier.detect(function(err, results) {
    // 結果を配列theirObjectsに割り当てる
    theirObjects = results;
    // 連続して検出
    theirDetect();
  });
}

できなかったこと

opencvからYOLOを使うと、cat：６０％　みたいな％も出せるようで、JSでやりたかった（できるのかな？）
目を線で引くのではなくて目の画像にしたかったのですが、うまく目についてこれず、目がたくさん下に溜まっていって怖かったので実装できず。。。
いつか頑張る。

参考サイト

感想

一人デバックの時、二つビデオチャットを立ち上げるとパソコンが壊れそうなくらいファンがすごく回るので冷や冷やする。
色々なものを組み合わせるのは本当に難しい。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up